1 module miao..string.horspool;
2 
3 /*!
4 Main features
5 simplification of the Boyer-Moore algorithm;
6 easy to implement;
7 preprocessing phase in O(m+sigma) time and O(sigma) space complexity;
8 searching phase in O(mn) time complexity;
9 the average number of comparisons for one text character is between 1/sigma and 2/(sigma+1).
10 
11 Description
12 The bad-character shift used in the Boyer-Moore algorithm (see chapter Boyer-Moore algorithm) is not very efficient for small alphabets, but when the alphabet is large compared with the length of the pattern, as it is often the case with the ASCII table and ordinary searches made under a text editor, it becomes very useful.
13 Using it alone produces a very efficient algorithm in practice. Horspool proposed to use only the bad-character shift of the rightmost character of the window to compute the shifts in the Boyer-Moore algorithm.
14 The preprocessing phase is in O(m+sigma) time and O(sigma) space complexity.
15 The searching phase has a quadratic worst case but it can be proved that the average number of comparisons for one text character is between 1/sigma and 2/(sigma+1).
16 */
17 
18 @trusted:
19 
20 import miao.common.skip_table;
21 import miao.common.check;
22 import miao.common.util;
23 
24 version(unittest) import std.stdio;
25 
26 struct Horspool_searcher(PatRange, CorpusRange = PatRange) 
27     if (isValidParam!(PatRange, CorpusRange)) {
28 public:
29     this(in PatRange pattern)
30 	{
31 		pattern_ = pattern;
32 		if (pattern_.length > 0) {
33 			skip_ = build_bm_table(pattern_);
34 		}
35 	}
36 	
37 	int search(in CorpusRange corpus) const
38 	out(result) {
39 		assert(result == -1 || (0 <= result && result < corpus.length));
40 	}
41 	body {
42 		if (corpus.length == 0 || pattern_.length == 0) return -1;
43 		if (corpus.length < pattern_.length) return -1;
44 		
45 		return search_(corpus);
46 	}
47 	
48 private:
49 	int search_(in CorpusRange corpus) const
50 	{
51 		immutable cmp_len = corpus.length - pattern_.length;
52 		
53 		int window_pos = 0;
54 		
55 		while (window_pos <= cmp_len) {
56 			const window = corpus[window_pos .. window_pos + pattern_.length];
57             
58             int cursor = pattern_.length - 1;
59 			
60 			//! find mismatch
61 			while (cursor >= 0 && window[cursor] == pattern_[cursor]) {
62 				--cursor;
63 			}
64 			
65 			if (cursor == -1) {
66 				return window_pos;
67 			}
68 			else {
69 				// sliding window
70 				const bad_char = window[$ - 1];
71 				window_pos += skip_[bad_char];
72 			}
73 		}
74 		
75 		return -1;
76 	}
77 	
78 private:
79 	immutable Skip_table!(ValueType!PatRange) skip_;
80 	const PatRange pattern_;
81 }
82 
83 alias horspool_search = GenerateFunction!Horspool_searcher;
84 
85 unittest {
86 	import miao.string.test;
87 	
88     testAll!(Horspool_searcher, horspool_search)();
89 }