1 module miao..string.boyer_moore;
2 
3 /*!
4 Main features
5     performs the comparisons from right to left;
6     preprocessing phase in O(m+sigma) time and space complexity;
7     searching phase in O(mn) time complexity;
8     3n text character comparisons in the worst case when searching for a non periodic pattern;
9     O(n / m) best performance.
10 
11 Description
12     The Boyer-Moore algorithm is considered as the most efficient string-matching algorithm in usual applications. A simplified version of it or the entire algorithm is often implemented in text editors for the «search» and «substitute» commands.
13     The algorithm scans the characters of the pattern from right to left beginning with the rightmost one. In case of a mismatch (or a complete match of the whole pattern) it uses two precomputed functions to shift the window to the right. These two shift functions are called the good-suffix shift (also called matching shift and the bad-character shift (also called the occurrence shift).
14 */
15 
16 @trusted:
17 
18 import miao.common.util;
19 import miao.common.check;
20 import miao.common.skip_table;
21 
22 import std.algorithm : max;
23 
24 struct Boyer_moore_searcher(PatRange, CorpusRange = PatRange) 
25     if (isValidParam!(PatRange, CorpusRange)) {
26 public:
27     this(in PatRange pattern)
28     {
29         pattern_ = pattern;
30 
31         if (pattern_.length > 0) {
32             bad_char_ = build_bm_table(pattern_);
33             good_suffix_ = build_gs_table_();
34         }
35     }
36 
37     int search(in CorpusRange corpus) const
38     out(result) {
39         assert(result == -1 || (0 <= result && result < corpus.length));
40     }
41     body {
42         if (corpus.length == 0 || pattern_.length == 0) return -1;
43         if (corpus.length < pattern_.length) return -1;
44 
45         return search_(corpus);
46     }
47 
48 private:
49     int search_(in CorpusRange corpus) const
50     {
51         immutable cmp_len = corpus.length - pattern_.length;
52 
53         for (auto win_pos = 0; win_pos <= cmp_len; ) {
54             const window = corpus[win_pos .. win_pos + pattern_.length];
55             
56             //! compare backward
57             int cursor = pattern_.length - 1;
58             for (; cursor >= 0 && window[cursor] == pattern_[cursor]; --cursor) {
59                 /* empty */
60             }
61 
62             if (cursor < 0) {
63                 return win_pos;
64             }
65             else {
66                 //! sliding window
67                 const bc = window[cursor];
68                 win_pos += max(bad_char_[bc] - (pattern_.length - 1 - cursor), good_suffix_[cursor]);
69             }
70         }
71 
72         return -1;
73     }
74 
75 private:
76     pure auto build_gs_table_() const nothrow
77     {
78         auto gs = new int[pattern_.length];
79         
80         gs[] = 1;
81 
82         if (pattern_.length >= 2) {
83         }
84 
85         return gs;
86     }
87 
88 private:
89     immutable Skip_table!(ValueType!PatRange) bad_char_;
90     immutable int[] good_suffix_;
91     const PatRange pattern_;
92 }
93 
94 alias boyer_moore_search = GenerateFunction!Boyer_moore_searcher;
95 
96 unittest {
97     import miao.string.test;
98 
99     testAll!(Boyer_moore_searcher, boyer_moore_search)();
100 }