diff options
Diffstat (limited to '02-data_structures/03-hash_tables/03-hash_substring')
-rw-r--r-- | 02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp index 3a3680a..6c9e457 100644 --- a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp +++ b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp @@ -1,5 +1,6 @@ #include <iostream> #include <string> +#include <cmath> #include <vector> using std::string; @@ -15,6 +16,39 @@ Data read_input() { return data; } +unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p) +{ + unsigned long long h = 0; + /* std::cout << "hash " << x << " " << p << std::endl; */ + for (size_t i = sz; i-- > 0; ) { + h = (h * x + s[i]) % p; + /* std::cout << " " << s[i] << " " << h << std::endl; */ + } + + return h; +} + +std::vector<unsigned long long> precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x) +{ + std::vector<unsigned long long> H(T.size() - P.size() + 1); + + H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p); + + unsigned long long y = 1; + for (size_t i = 1; i <= P.size(); i++) + y = (y * x) % p; + /* std::cout << "y : " << y << std::endl; */ + + for (size_t i = H.size() - 1; i-- > 0; ) + H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p; + + /* for (size_t i = 0; i < H.size(); i++) */ + /* std::cout << H[i] << " "; */ + /* std::cout << std::endl; */ + + return H; +} + void print_occurrences(const std::vector<int>& output) { for (size_t i = 0; i < output.size(); ++i) std::cout << output[i] << " "; @@ -23,10 +57,27 @@ void print_occurrences(const std::vector<int>& output) { std::vector<int> get_occurrences(const Data& input) { const string& s = input.pattern, t = input.text; + + static const unsigned long x = 6; + static const unsigned long p = 1000000007; + + unsigned long long h = poly_hash(s, s.size(), x, p); + + auto hashes = precompute_hashes(t, s, p, x); + std::vector<int> ans; - for (size_t i = 0; i + s.size() <= t.size(); ++i) - if (t.substr(i, s.size()) == s) - ans.push_back(i); + for (size_t i = 0; i < hashes.size(); i++) + if (hashes[i] == h) { + bool match = true; + for (size_t j = 0; j < s.size(); j++) { + if (s[j] != t[i + j]) { + match = false; + break; + } + } + if (match) + ans.push_back(i); + } return ans; } |