/***********************************************************************/
/*                                                                     */
/*                           Corrector                                 */
/*                                                                     */
/*                   Damien Doligez, INRIA Rocquencourt                */
/*                                                                     */
/*  Copyright 1997 Institut National de Recherche en Informatique et   */
/*  Automatique.  Distributed only by permission.                      */
/*                                                                     */
/***********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dict.h"

void usage(void);

trans *automaton;
long allocated_length;
long useful_length;
long first_try;

void init_auto(void)
{
  allocated_length = 1024;
  automaton = (trans *) malloc (allocated_length * sizeof(trans));
  if (automaton == NULL) {
    fprintf(stderr, "Not enough memory.\n");
    exit(2);
  }
  memset(automaton,0,allocated_length * sizeof(trans));
  useful_length = 0;
  first_try = 0;
}

long add_state (trans t[256], int num_arrows, int final)
{
    long i,j;
    long old_length;

    if (useful_length + 256 > allocated_length) {
#ifdef DEBUG
      fprintf(stderr,"Growing automaton\n");
#endif
      old_length = allocated_length;
      allocated_length = allocated_length * 2;
      automaton = (trans *) realloc(automaton, allocated_length * sizeof(trans));
      if (automaton == NULL) {
	fprintf(stderr, "Not enough memory.\n");
	exit(2);
      }
      memset(&(automaton[old_length]),0,
	     (allocated_length - old_length) * sizeof(trans));
    };
    if (first_try < useful_length - 512)
      first_try = useful_length - 512;
    while (automaton[first_try] & 0x100)
      ++first_try;
    for (i=first_try;1; i++) {
      if (automaton[i] & 0x100)
	goto not_good;
      for (j=0; j<num_arrows; j++) {
	if (automaton[i+(t[j] & 0xFF)] & 0xFF)
	  goto not_good;
      };
      break;
    not_good: ;
    };
    automaton[i] |= 0x100;
    if (final) automaton[i] |= 0x200;
    for (j=0; j < num_arrows; j++) {
      automaton [i + (t[j] & 0xFF)] |= t[j];
    };
    if (useful_length < i + 256)
      useful_length = i + 256;
    return i;
  }


/** Hash consing of states **/
#define Empty 0x80000000
struct bucket {
  unsigned long key;
  unsigned long state;
};
struct bucket *hash_table;
unsigned long hash_table_size;
unsigned long hash_table_fill;
unsigned long states_made;

void hash_init(void)
{
  unsigned long i;
    
  hash_table_size = 997;
  hash_table = (struct bucket *) malloc (hash_table_size  * sizeof(struct bucket));
  if (hash_table == NULL) {
    fprintf(stderr,"Not enough memory.\n");
    exit(2);
  };
  for (i=0; i < hash_table_size; i++)
    hash_table[i].state = Empty;
  hash_table_fill = 0;
  states_made = 0;
}

int verify_state (trans t[256], int num_arrows, int final, unsigned long st)
{
  int i;
  trans tr;
  trans arrows[256];
  int n;
  
  if (!!final != !!is_final(st)) return 0;
  n = 0;
  for (i=1;i<256;i++) {
    tr = transition (st,i);
    if (is_bad(tr,i)) {
      arrows[i]=-1;
    }
    else {
      ++n;
      arrows[i] = tr;
    };
  };
  if (n != num_arrows) return 0;
  for (i=0; i < num_arrows; i++) {
    if (get_state(t[i]) != get_state(arrows[t[i] & 0xFF])) return 0;
  };
  return 1;
}

long make_state(trans t[256], int num_arrows, int final)
{
  unsigned long h = 0;
  unsigned long i,j;
  
  ++states_made;
  if(hash_table_fill >= hash_table_size / 2) {
    struct bucket *new_table;
    unsigned long new_size = hash_table_size * 2 + 1;
#ifdef DEBUG
    fprintf(stderr,"Growing hash table\n");
#endif
    
    new_table = (struct bucket *)malloc(new_size * sizeof(struct bucket));
    if (new_table == NULL) {
      fprintf(stderr, "Not enough memory.\n");
      exit(2);
    };
    for (i=0;i<new_size;i++)
      new_table[i].state = Empty;
    for (i=0; i< hash_table_size; i++) {
      if (hash_table[i].state != Empty) {
	for (j=hash_table[i].key % new_size;
	     new_table[j].state != Empty; j++) {
	  if (j>= new_size) j -= new_size;
	};
	new_table[j].key = hash_table[i].key;
	new_table[j].state = hash_table[i].state;
      };
    };
    free(hash_table);
    hash_table = new_table;
    hash_table_size = new_size;
  };
  for (i=0; i < num_arrows; i++)
    h ^= t[i];
  if (final)
    h ^= 0x12345678;
  for (i = h % hash_table_size; hash_table[i].state != Empty; i++) {
    if (i>=hash_table_size)
      i-=hash_table_size;
    if (hash_table[i].key == h
	&& verify_state(t,num_arrows, final, hash_table[i].state))
      return hash_table[i].state;
  };
  ++hash_table_fill;
  hash_table[i].key = h;
  hash_table[i].state = add_state (t, num_arrows,final);
  return hash_table[i].state;
}

void output_automaton(unsigned long init_state)
{
  fwrite(&init_state,sizeof(init_state),1,stdout);
  fwrite(automaton,sizeof(automaton[0]),useful_length,stdout);
}

int read_word(char *buf)
{
  int c,n;
  
  n = 0;
  while (1) {
    c = getchar();
    if (c == EOF) return 1;
    if (c == '\n') break;
    if (n < Max_len) buf[n++] = c;
  }
  buf[n] = '\0';
  return 0;
}

long automat(void)
{
  int i,j,n;
  trans tr[Max_len][256];
  int tr_cur[Max_len];
  unsigned char lab[Max_len];
  int final[Max_len];
  char buf1[Max_len+1];
  char buf2[Max_len+1];
  char *prev, *cur, *t;

  tr_cur[0]=0;
  prev = buf1;
  cur = buf2;
  prev[0]=0;
  final[0]=0;
  n=0;
  while (1) {
    if (read_word(cur)) break;
    for (i=0;prev[i];i++) {
      if (prev[i] != cur[i]) break;
    };
    if (i==0) {
      fprintf(stderr,"%c", cur[0]);
      fflush(stderr);
    };
    for (j=n; j>i; j--) {
      tr[j-1][tr_cur[j-1]] =
	(make_state(tr[j],tr_cur[j],final[j]) << 10) + lab[j];
      ++tr_cur[j-1];
    };
    for (j=i;j<strlen(cur);j++) {
      tr_cur[j+1]=0;
      lab[j+1]=cur[j];
      final[j+1]= 0;
    };
    n=j;
    final[n]=1;
    t=prev;
    prev=cur;
    cur=t;
  };
  fprintf(stderr,"\n");
  for (j=n; j>0; j--) {
    tr[j-1][tr_cur[j-1]] =
      (make_state(tr[j],tr_cur[j],final[j]) << 10) + lab[j];
    ++tr_cur[j-1];
  };
  return make_state(tr[0],tr_cur[0],final[0]);
}

int main(int argc, char * argv[])
{
  unsigned long init_state;
  if (argc != 1) usage();
  init_auto();
  hash_init();
  init_state = automat();
  output_automaton(init_state);
  exit(0);
}

void usage (void)
{
  fprintf(stderr,"Usage: make_dict <word_list_file > automaton_file\n");
  exit (2);
}
