1 module miao..string.karp_rabin;
2 
3 /*!
4  * uses an hashing function;
5  * preprocessing phase in O(m) time complexity and constant space;
6  * searching phase in O(mn) time complexity;
7  * O(n+m) expected running time.
8  *
9  * \description
10 Hashing provides a simple method to avoid a quadratic number of character comparisons in most practical situations. Instead of checking at each position of the text if the pattern occurs, it seems to be more efficient to check only if the contents of the window “looks like” the pattern. In order to check the resemblance between these two words an hashing function is used.
11 To be helpful for the string matching problem an hashing function hash should have the following properties:
12  	efficiently computable;
13  	highly discriminating for strings;
14  	hash(y[j+1 .. j+m]) must be easily computable from hash(y[j .. j+m-1]) and y[j+m]:
15   hash(y[j+1 .. j+m])= rehash(y[j], y[j+m], hash(y[j .. j+m-1]).
16 For a word w of length m let hash(w) be defined as follows:
17 hash(w[0 .. m-1])=(w[0]*2m-1+ w[1]*2m-2+···+ w[m-1]*20) mod q
18 where q is a large number.
19 Then, rehash(a,b,h)= ((h-a*2m-1)*2+b) mod q
20 The preprocessing phase of the Karp-Rabin algorithm consists in computing hash(x). It can be done in constant space and O(m) time.
21 During searching phase, it is enough to compare hash(x) with hash(y[j .. j+m-1]) for 0 leq j < n-m. If an equality is found, it is still necessary to check the equality x=y[j .. j+m-1] character by character.
22 The time complexity of the searching phase of the Karp-Rabin algorithm is O(mn) (when searching for am in an for instance). Its expected number of text character comparisons is O(n+m).
23  */
24 
25 import std.math;
26  
27 @trusted:
28 
29 import miao.common.check;
30 import miao.common.util;
31 
32 struct Karp_rabin_searcher(PatRange, CorpusRange = PatRange) 
33     if (isValidParam!(PatRange, CorpusRange)) {
34 public:
35 	this(in PatRange pattern)
36 	{
37 		/* preprocessing */
38 		pattern_ = pattern;
39 		pattern_length_ = pattern.length;
40 		hashed_pattern_ = hash_(pattern);
41 		if (pattern_length_ > 0) {
42 			d_ = pow(2, pattern_length_ - 1);
43 		}
44 	}
45 	
46 	int search(in CorpusRange corpus) const
47 	out(result) {
48 		assert(result == -1 || (0 <= result && result < corpus.length));
49 	}
50 	body {
51 		if (corpus.length == 0 || pattern_.length == 0) return -1;
52 		if (corpus.length < pattern_.length) return -1;
53 		
54 		return search_(corpus);
55 	}
56 	
57 private:
58 	int search_(in CorpusRange corpus) const
59 	{		
60 		const compare_length = corpus.length - pattern_length_;
61 
62 		auto hashed_window = hash_(corpus);
63 		auto window_pos = 0;
64 
65 		for (; window_pos < compare_length; ++window_pos) {
66 			if (hashed_pattern_ == hashed_window && 
67 				pattern_ == corpus[window_pos .. window_pos + pattern_length_]) {
68 				return window_pos;
69 			}
70 			
71 			hashed_window = rehash_(corpus[window_pos], 
72 									corpus[window_pos + pattern_length_], 
73 									hashed_window);
74 		}
75 
76 		if (hashed_pattern_ == hashed_window && 
77 			pattern_ == corpus[window_pos .. $]) {
78 			return window_pos;
79 		}
80 
81 		return -1;
82 	}
83 
84 	int hash_(Rng)(in Rng s) const
85 	{
86 		int code = 0;
87 		
88 		for (auto i = 0; i < pattern_length_; ++i) {
89 			code = (code << 1) + s[i];
90 		}
91 		
92 		return code;
93 	}
94 	
95 	pure int rehash_(in int oldv, in int newv, in int hcode) const nothrow
96 	{
97 		assert(pattern_length_ > 0);
98 		return ((hcode - oldv * d_) << 1) + newv;
99 	}
100 	
101 private:
102 	const PatRange pattern_;
103 	immutable int hashed_pattern_;
104 	immutable int pattern_length_;
105 	immutable int d_;
106 }
107 
108 alias karp_rabin_search = GenerateFunction!Karp_rabin_searcher;
109 
110 unittest {
111 	import miao.string.test;
112 	
113     testAll!(Karp_rabin_searcher, karp_rabin_search)();
114 }