#include <assert.h>
#include <stdio.h>
#include <string.h>

FILE *discard;
typedef unsigned char uchar_t;
#define CERTAIN 1
#define UNCERTAIN 2
char state[32000];

#define kanji_code(s) ((s)[0]*256 + (s)[1])

uchar_t orig_kanji[50];

void
output (header, header_yomi, str)
     uchar_t *header, *header_yomi, *str;
{
  uchar_t this_header[30], kanji_yomi[30], *this_trailer;
  unsigned code;
  uchar_t *delim = strchr(str, '<');
  uchar_t *rdelim = strchr(str, '>');
  uchar_t new_header[30];
  uchar_t new_header_yomi[30];

  if (!delim) {
    char buf[200];
    strcpy(buf, header_yomi);
    strcat(buf, str);
    if (!strcmp(orig_kanji, buf)) {
      fprintf(discard, "%s\n", orig_kanji);
    } else {
      printf("%s%s	%s%s\n", header_yomi, str, header, str);
    }
    return;
  }
  strncpy(this_header, str, delim-str-2); this_header[delim-str-2] = 0;
  code = kanji_code(delim-2);
  assert(rdelim);
  strncpy(kanji_yomi, delim+1, rdelim-delim-1); kanji_yomi[rdelim-delim-1] = 0;
  this_trailer = rdelim+1;

  sprintf(new_header, "%s%s%c%c", header, this_header, delim[-2], delim[-1]);
  if (state[code-0x8080] == 0) {
    sprintf(new_header_yomi, "%s%s%s", header_yomi, this_header, kanji_yomi);
    output(new_header, new_header_yomi, this_trailer);
  } else if (state[code-0x8080] & CERTAIN) {
    sprintf(new_header_yomi, "%s%s%c%c", header_yomi, this_header,
	    delim[-2], delim[-1]);
    output(new_header, new_header_yomi, this_trailer);
  } else {
    sprintf(new_header_yomi, "%s%s%s", header_yomi, this_header, kanji_yomi);
    output(new_header, new_header_yomi, this_trailer);
    sprintf(new_header_yomi, "%s%s%c%c", header_yomi, this_header,
	    delim[-2], delim[-1]);
    output(new_header, new_header_yomi, this_trailer);
  }
}


unsigned
read_next_code (f)
    FILE *f;
{
  char buf[BUFSIZ];
  int dummy1;
  uchar_t kanji[5];
  char stroke[3];
  int dummy2;
  if (!fgets(buf, BUFSIZ, f)) return 0;
  if (!sscanf(buf, "%d %s %x %s", &dummy1, stroke, &dummy2, kanji) == 4)
    return -1;
  return kanji_code(kanji);
}

void
read_certain_file(path, val)
     char *path;
     int val;
{
  FILE *f;
  int linenum;
  char buf[BUFSIZ];
  if (!(f = fopen(path, "r"))) {
    perror(path);
    exit(1);
  }
  linenum = 0;

  while (fgets(buf, BUFSIZ, f)) {
    uchar_t *s = strtok(buf, " \t\n");
    unsigned code = s ? kanji_code(s) : 0;
    linenum++;
    if (!(code >= 0x8080 && code <= 0xffff)) {
      fprintf(stderr, "%s:%d:ignoring %s.\n", path, linenum, s);
    } else {
      state[code-0x8080] = val;
    }
    while (s = strtok(NULL, " \t\n")) {
      code = kanji_code(s);
      if (!(code >= 0x8080 && code <= 0xffff)) {
	fprintf(stderr, "%s:%d:ignoring %s.\n", path, linenum, s);
      } else {
	state[code-0x8080] = val;
      }
    }
  }
  fclose(f);
}

int
main (argc, argv)
     int argc;
     char **argv;
{
  FILE *f;
  uchar_t buf[BUFSIZ];
  unsigned code;
  int line;
  if (argc < 3) {
    fprintf(stderr, "reduce dictionary\n");
    fprintf(stderr, "Usage: reduce file1 file2 <dictionary\n");
    fprintf(stderr, " file1: certainly memorized t-code\n");
    fprintf(stderr, " file2: uncertainly memorized t-code\n");
    exit(1);
  }

  discard = fopen("discard", "w");
  assert(discard);

  read_certain_file(argv[1], CERTAIN);
  read_certain_file(argv[2], UNCERTAIN);

  f = stdin;
  while (fgets(buf, BUFSIZ, f)) {
    uchar_t *delim;
    uchar_t header[30];
    uchar_t *dest = orig_kanji, *src = buf;
    buf[strlen(buf)-1] = '\0';
    while (*src) {
      if (*src == '<') {
	while (*src != '>') src++;
	src++;
      }
      *dest++ = *src++;
    }
    *dest = 0;

    delim = strchr(buf, '<');
    assert(delim >= buf+2); /* '<'$B$NA0$K$OA43Q0lJ8;z0J>e$J$-$c$$$1$J$$(B */
    memset(header, 0, sizeof header);
    strncpy(header, buf, (delim - buf - 2));

    output(header, header, delim-2);
  }
  exit(0);
}
