summaryrefslogtreecommitdiffstats
path: root/02-data_structures/03-hash_tables/03-hash_substring
diff options
context:
space:
mode:
Diffstat (limited to '02-data_structures/03-hash_tables/03-hash_substring')
-rw-r--r--02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp57
1 files changed, 54 insertions, 3 deletions
diff --git a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
index 3a3680a..6c9e457 100644
--- a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
+++ b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
@@ -1,5 +1,6 @@
#include <iostream>
#include <string>
+#include <cmath>
#include <vector>
using std::string;
@@ -15,6 +16,39 @@ Data read_input() {
return data;
}
+unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p)
+{
+ unsigned long long h = 0;
+ /* std::cout << "hash " << x << " " << p << std::endl; */
+ for (size_t i = sz; i-- > 0; ) {
+ h = (h * x + s[i]) % p;
+ /* std::cout << " " << s[i] << " " << h << std::endl; */
+ }
+
+ return h;
+}
+
+std::vector<unsigned long long> precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x)
+{
+ std::vector<unsigned long long> H(T.size() - P.size() + 1);
+
+ H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p);
+
+ unsigned long long y = 1;
+ for (size_t i = 1; i <= P.size(); i++)
+ y = (y * x) % p;
+ /* std::cout << "y : " << y << std::endl; */
+
+ for (size_t i = H.size() - 1; i-- > 0; )
+ H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p;
+
+ /* for (size_t i = 0; i < H.size(); i++) */
+ /* std::cout << H[i] << " "; */
+ /* std::cout << std::endl; */
+
+ return H;
+}
+
void print_occurrences(const std::vector<int>& output) {
for (size_t i = 0; i < output.size(); ++i)
std::cout << output[i] << " ";
@@ -23,10 +57,27 @@ void print_occurrences(const std::vector<int>& output) {
std::vector<int> get_occurrences(const Data& input) {
const string& s = input.pattern, t = input.text;
+
+ static const unsigned long x = 6;
+ static const unsigned long p = 1000000007;
+
+ unsigned long long h = poly_hash(s, s.size(), x, p);
+
+ auto hashes = precompute_hashes(t, s, p, x);
+
std::vector<int> ans;
- for (size_t i = 0; i + s.size() <= t.size(); ++i)
- if (t.substr(i, s.size()) == s)
- ans.push_back(i);
+ for (size_t i = 0; i < hashes.size(); i++)
+ if (hashes[i] == h) {
+ bool match = true;
+ for (size_t j = 0; j < s.size(); j++) {
+ if (s[j] != t[i + j]) {
+ match = false;
+ break;
+ }
+ }
+ if (match)
+ ans.push_back(i);
+ }
return ans;
}