(***********************************************************************) (* *) (* Corrector *) (* *) (* Damien Doligez and Francois Rouaix, INRIA Rocquencourt *) (* *) (* Copyright 1997 Institut National de Recherche en Informatique et *) (* Automatique. Distributed only by permission. *) (* *) (***********************************************************************) (* The notion of word *) module type Words = sig type t type letter (* must support = *) val length : t -> int val get : t -> int -> letter val implode : letter list -> t end (* The dictionary *) module type DictAutom = sig type t type state type label val initial : t -> state val is_final : t -> state -> bool val transition : t -> state -> label -> state val transitions : t -> state -> label list end module Make(W : Words)(D : DictAutom with type label = W.letter) = struct (* Word modifications *) type delta = | Ins of W.letter | Del of W.letter | Copy of W.letter (* Rebuild word from list of modifications * Note: we get modifications in reverse order *) let implode l = let rec getletters accu = function | [] -> accu | Ins c :: l -> getletters (c :: accu) l | Del _ :: l -> getletters accu l | Copy c :: l -> getletters (c :: accu) l in W.implode (getletters [] l) (* Cost parameters *) type costs = { tr : int; (* transposition cost *) del : int; (* delete cost *) ins : int; (* insert cost *) subst : W.letter -> W.letter -> int; (* param. substitution cost *) max : W.t -> int } let default_costs = { tr = 80; del = 100; ins = 100; subst = (fun _ _ -> 130); max = (fun w -> 18 * (W.length w - 1) + 100) } let suggest dict costs candidate = (* macros *) let is_final = D.is_final dict and transition = D.transition dict and transitions = D.transitions dict in let max_i = W.length candidate - 1 in (* adjust costs *) let max_cost = costs.max candidate in let res = ref [] in let add_res l cost = res := (implode l , cost) :: !res in (* i : current pos in candidate * state : current state in automata * cost : current cost for this path * letters : delta operations in reverse order *) let rec walk i state cost letters = if cost <= max_cost then begin if i > max_i then begin (* end of candidate reached *) if is_final state then add_res letters cost; (* continue to copy from suffixes *) List.iter (fun c -> check i (transition state c) (cost+costs.ins) (Ins c :: letters)) (transitions state) end else begin let c = W.get candidate i in let nexts = transitions state in (* this one is matching, continue in this subtree *) if List.mem c nexts then check (i+1) (transition state c) cost (Copy c::letters); (* there are non matching transitions *) if nexts <> [c] then (* try to insert them *) List.iter (fun c' -> if c = c' then () else check i (transition state c') (cost+costs.ins) (Ins c'::letters)) nexts; (* or try to remove the mismatching letter *) if nexts <> [c] || is_final state then check (i+1) state (cost+costs.del) (Del c :: letters) end end (* exclude redundant cases *) and check i state cost = function | Del x :: Ins y :: _ -> () | Del x :: Copy _ :: Ins y :: _ when x = y -> () | Ins x :: Del y :: _ when x = y -> () | Copy x :: Del y :: _ when x = y -> () | Copy x :: Ins y :: _ when x = y -> () | (Ins x :: Del y :: _) as letters -> walk i state (cost-costs.ins-costs.del+costs.subst y x) letters | (Ins x :: Copy _ :: Del y :: _) as letters when x = y -> walk i state (cost-costs.ins-costs.del+costs.tr) letters | (Ins x :: Copy y :: l) as letters when x = y && let rec p = function | Copy y :: l when x = y -> p l | Del z :: l -> true | _ -> false in p l -> walk i state (cost-costs.ins-costs.del+costs.subst y x) letters | letters -> walk i state cost letters in walk 0 (D.initial dict) 0 []; max_cost, !res let check dict word = let l = W.length word in let rec w pos state = if pos = l then D.is_final dict state else w (pos+1) (D.transition dict state (W.get word pos)) in try w 0 (D.initial dict) with Not_found -> false let f dict costs word = if check dict word then 0, [word, 0] else suggest dict costs word end