1 module miao..string.shift_or;
2 
3 /*!
4 Main features
5     uses bitwise techniques;
6     efficient if the pattern length is no longer than the memory-word size of the machine;
7     preprocessing phase in O(m + sigma) time and space complexity;
8     searching phase in O(n) time complexity (independent from the alphabet size and the pattern length);
9     adapts easily to approximate string matching.
10 */
11 
12 @trusted:
13 
14 import miao.common.check;
15 import miao.common.util;
16 
17 struct Shift_or_searcher(PatRange, CorpusRange = PatRange) 
18     if (isValidParam!(PatRange, CorpusRange)) {
19 public:
20     this(in PatRange pattern)
21     in {
22 		import std.string;
23 
24         debug assert(pattern.length <= word_len, 
25 			format("@Shift_or_searcher: pattern length %s must less than word size %s", 
26 			   pattern.length, 
27 			   word_len)
28 			);
29     }
30     body {
31         pat_len_ = pattern.length;
32         if (pat_len_ > 0) {
33             s_ = preprocess_(pattern);
34             lim_ = ~cast(underly_type)0 << (pat_len_ - 1);
35         }
36     }
37     
38     int search(in CorpusRange corpus) const
39 	out(result) {
40 			assert(result == -1 || (0 <= result && result < corpus.length));
41 	}
42 	body {
43         if (corpus.length == 0 || pat_len_ == 0) return -1;
44 		if (corpus.length < pat_len_) return -1;
45 		
46 		return search_(corpus);
47     }
48 
49 private:
50     alias underly_type = ulong;
51 
52     enum word_len = underly_type.sizeof * 8;
53     enum ascii_size = cast(uint)0xff;
54     
55     int search_(in CorpusRange corpus) const
56     {
57         auto state = ~cast(underly_type)0;
58         
59         foreach (const cursor, letter; corpus) {
60             state = (state << 1) | s_[letter];
61             if (state < lim_) {
62                 assert(cursor - pat_len_ + 1 >= 0);
63                 return cursor - pat_len_ + 1;
64             }
65         }
66     
67         return -1;
68     }
69     
70     underly_type[] preprocess_(in PatRange pattern) const
71     in {
72         assert(0 < pat_len_ && pat_len_ <= word_len);
73         assert(s_ == null);
74         assert(lim_ == 0);
75     }
76     body {
77         auto s = new underly_type[ascii_size];
78         foreach (ref x; s) {
79             x = ~cast(underly_type)0;
80         }
81         
82         //! record each letter's occurence on the pattern string
83         foreach (const cursor, letter; pattern) {
84 			s[letter] &= ~(cast(underly_type)0x1 << cursor);
85 		}
86 
87         return s;
88     }
89     
90 private:
91     immutable underly_type[] s_;
92     immutable underly_type lim_;
93     immutable int pat_len_;
94 }
95 
96 alias shift_or_search = GenerateFunction!Shift_or_searcher;
97 
98 unittest {
99     import miao.string.test;
100 
101     testAll!(Shift_or_searcher, shift_or_search)();
102 }