1 module miao..string.dfa; 2 3 /*! 4 Main features 5 builds the minimal deterministic automaton recognizing the language Sigma*x; 6 extra space in O(msigma) if the automaton is stored in a direct access table; 7 preprocessing phase in O(msigma) time complexity; 8 searching phase in O(n) time complexity if the automaton is stored in a direct access table, O(nlog(sigma)) otherwise. 9 10 Description 11 Searching a word x with an automaton consists first in building the minimal Deterministic Finite Automaton (DFA) A(x) recognizing the language Sigma*x. 12 The DFA A(x) =(Q, q0, T, E) recognizing the language Sigma*x is defined as follows: 13 Q is the set of all the prefixes of x: Q={epsilon, x[0], x[0 .. 1], ... , x[0 .. m-2], x}; 14 q0=epsilon; 15 T={x}; 16 for q in Q (q is a prefix of x) and a in Sigma, (q, a, qa) is in E if and only if qa is also a prefix of x, otherwise (q, a, p) is in E such that p is the longest suffix of qa which is a prefix of x. 17 The DFA A(x) can be constructed in O(m+sigma) time and O(msigma) space. 18 Once the DFA A(x) is build, searching for a word x in a text y consists in parsing the text y with the DFA A(x) beginning with the initial state q0. Each time the terminal state is encountered an occurrence of x is reported. 19 The searching phase can be performed in O(n) time if the automaton is stored in a direct access table, in O(nlog(sigma)) otherwise. 20 */ 21 22 @trusted: 23 24 import miao.common.check; 25 import miao.common.util; 26 27 //! \fix generalized to all kinds of charset 28 struct Dfa_searcher(PatRange, CorpusRange = PatRange) 29 if (isValidParam!(PatRange, CorpusRange)) { 30 public: 31 this(in PatRange pattern) 32 { 33 pat_len_ = pattern.length; 34 if (pat_len_ > 0) { 35 terminal_ = pat_len_; 36 dfa_ = build_dfa_(pattern); 37 } 38 } 39 40 int search(in CorpusRange corpus) const 41 out(result) { 42 assert(result == -1 || (0 <= result && result < corpus.length)); 43 } 44 body { 45 if (corpus.length == 0 || pat_len_ == 0) return -1; 46 if (corpus.length < pat_len_) return -1; 47 48 return search_(corpus); 49 } 50 51 private: 52 enum ascii_size = cast(uint)0xff; 53 enum init_state = 0; 54 55 int[][] build_dfa_(in PatRange pattern) const 56 in { 57 assert(pat_len_ > 0); 58 } 59 body { 60 auto dfa = new int[][pat_len_ + 1]; 61 62 auto state = init_state; 63 64 for (auto i = 0; i < pat_len_; ++i) { 65 const target = i + 1; 66 const event = pattern[i]; 67 68 if (dfa[state] == null) { 69 dfa[state] = new int[ascii_size]; 70 } 71 72 assert(dfa[target] == null); 73 74 auto old_target = dfa[state][event]; 75 76 dfa[state][event] = target; 77 dfa[target] = new int[ascii_size]; 78 dfa[target][] = dfa[old_target][]; 79 80 state = target; 81 } 82 83 assert(state == terminal_); 84 85 return dfa; 86 } 87 88 int search_(inout const CorpusRange corpus) const 89 { 90 for (auto state = init_state, cursor = 0; cursor < corpus.length; ++cursor) { 91 const event = corpus[cursor]; 92 93 assert(event < ascii_size, "@Dfa_searcher: only ascii are supported for the moment"); 94 95 if ((state = transit_(state, event)) == terminal_) { 96 assert(cursor + 1 >= pat_len_); 97 return cursor - pat_len_ + 1; 98 } 99 } 100 101 return -1; 102 } 103 104 int transit_(int state, int event) const 105 in { 106 assert(0 <= state && state < terminal_); 107 assert(0 <= event && event < ascii_size); 108 } 109 body { 110 const table = dfa_[state]; 111 return table == null? 0: table[event]; 112 } 113 114 private: 115 immutable int[][] dfa_; 116 immutable int terminal_; 117 immutable int pat_len_; 118 } 119 120 alias dfa_search = GenerateFunction!Dfa_searcher; 121 122 unittest { 123 import miao.string.test; 124 125 testAll!(Dfa_searcher, dfa_search)(); 126 }