1 module miao..string.shift_or; 2 3 /*! 4 Main features 5 uses bitwise techniques; 6 efficient if the pattern length is no longer than the memory-word size of the machine; 7 preprocessing phase in O(m + sigma) time and space complexity; 8 searching phase in O(n) time complexity (independent from the alphabet size and the pattern length); 9 adapts easily to approximate string matching. 10 */ 11 12 @trusted: 13 14 import miao.common.check; 15 import miao.common.util; 16 17 struct Shift_or_searcher(PatRange, CorpusRange = PatRange) 18 if (isValidParam!(PatRange, CorpusRange)) { 19 public: 20 this(in PatRange pattern) 21 in { 22 import std.string; 23 24 debug assert(pattern.length <= word_len, 25 format("@Shift_or_searcher: pattern length %s must less than word size %s", 26 pattern.length, 27 word_len) 28 ); 29 } 30 body { 31 pat_len_ = pattern.length; 32 if (pat_len_ > 0) { 33 s_ = preprocess_(pattern); 34 lim_ = ~cast(underly_type)0 << (pat_len_ - 1); 35 } 36 } 37 38 int search(in CorpusRange corpus) const 39 out(result) { 40 assert(result == -1 || (0 <= result && result < corpus.length)); 41 } 42 body { 43 if (corpus.length == 0 || pat_len_ == 0) return -1; 44 if (corpus.length < pat_len_) return -1; 45 46 return search_(corpus); 47 } 48 49 private: 50 alias underly_type = ulong; 51 52 enum word_len = underly_type.sizeof * 8; 53 enum ascii_size = cast(uint)0xff; 54 55 int search_(in CorpusRange corpus) const 56 { 57 auto state = ~cast(underly_type)0; 58 59 foreach (const cursor, letter; corpus) { 60 state = (state << 1) | s_[letter]; 61 if (state < lim_) { 62 assert(cursor - pat_len_ + 1 >= 0); 63 return cursor - pat_len_ + 1; 64 } 65 } 66 67 return -1; 68 } 69 70 underly_type[] preprocess_(in PatRange pattern) const 71 in { 72 assert(0 < pat_len_ && pat_len_ <= word_len); 73 assert(s_ == null); 74 assert(lim_ == 0); 75 } 76 body { 77 auto s = new underly_type[ascii_size]; 78 foreach (ref x; s) { 79 x = ~cast(underly_type)0; 80 } 81 82 //! record each letter's occurence on the pattern string 83 foreach (const cursor, letter; pattern) { 84 s[letter] &= ~(cast(underly_type)0x1 << cursor); 85 } 86 87 return s; 88 } 89 90 private: 91 immutable underly_type[] s_; 92 immutable underly_type lim_; 93 immutable int pat_len_; 94 } 95 96 alias shift_or_search = GenerateFunction!Shift_or_searcher; 97 98 unittest { 99 import miao.string.test; 100 101 testAll!(Shift_or_searcher, shift_or_search)(); 102 }