1 module miao..string.boyer_moore; 2 3 /*! 4 Main features 5 performs the comparisons from right to left; 6 preprocessing phase in O(m+sigma) time and space complexity; 7 searching phase in O(mn) time complexity; 8 3n text character comparisons in the worst case when searching for a non periodic pattern; 9 O(n / m) best performance. 10 11 Description 12 The Boyer-Moore algorithm is considered as the most efficient string-matching algorithm in usual applications. A simplified version of it or the entire algorithm is often implemented in text editors for the «search» and «substitute» commands. 13 The algorithm scans the characters of the pattern from right to left beginning with the rightmost one. In case of a mismatch (or a complete match of the whole pattern) it uses two precomputed functions to shift the window to the right. These two shift functions are called the good-suffix shift (also called matching shift and the bad-character shift (also called the occurrence shift). 14 */ 15 16 @trusted: 17 18 import miao.common.util; 19 import miao.common.check; 20 import miao.common.skip_table; 21 22 import std.algorithm : max; 23 24 struct Boyer_moore_searcher(PatRange, CorpusRange = PatRange) 25 if (isValidParam!(PatRange, CorpusRange)) { 26 public: 27 this(in PatRange pattern) 28 { 29 pattern_ = pattern; 30 31 if (pattern_.length > 0) { 32 bad_char_ = build_bm_table(pattern_); 33 good_suffix_ = build_gs_table_(); 34 } 35 } 36 37 int search(in CorpusRange corpus) const 38 out(result) { 39 assert(result == -1 || (0 <= result && result < corpus.length)); 40 } 41 body { 42 if (corpus.length == 0 || pattern_.length == 0) return -1; 43 if (corpus.length < pattern_.length) return -1; 44 45 return search_(corpus); 46 } 47 48 private: 49 int search_(in CorpusRange corpus) const 50 { 51 immutable cmp_len = corpus.length - pattern_.length; 52 53 for (auto win_pos = 0; win_pos <= cmp_len; ) { 54 const window = corpus[win_pos .. win_pos + pattern_.length]; 55 56 //! compare backward 57 int cursor = pattern_.length - 1; 58 for (; cursor >= 0 && window[cursor] == pattern_[cursor]; --cursor) { 59 /* empty */ 60 } 61 62 if (cursor < 0) { 63 return win_pos; 64 } 65 else { 66 //! sliding window 67 const bc = window[cursor]; 68 win_pos += max(bad_char_[bc] - (pattern_.length - 1 - cursor), good_suffix_[cursor]); 69 } 70 } 71 72 return -1; 73 } 74 75 private: 76 pure auto build_gs_table_() const nothrow 77 { 78 auto gs = new int[pattern_.length]; 79 80 gs[] = 1; 81 82 if (pattern_.length >= 2) { 83 } 84 85 return gs; 86 } 87 88 private: 89 immutable Skip_table!(ValueType!PatRange) bad_char_; 90 immutable int[] good_suffix_; 91 const PatRange pattern_; 92 } 93 94 alias boyer_moore_search = GenerateFunction!Boyer_moore_searcher; 95 96 unittest { 97 import miao.string.test; 98 99 testAll!(Boyer_moore_searcher, boyer_moore_search)(); 100 }