From b524eca827b182b0a51952557e36c945c779f848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Zurcher?= Date: Sun, 13 Nov 2016 22:22:51 +0100 Subject: Algorithms : complete 02-data_structures 03-hash_tables --- .../03-hash_tables/01-phone_book/phone_book.cpp | 39 +++++---------- .../03-hash_tables/02-hash_chains/hash_chains.cpp | 26 +++++----- .../03-hash_substring/hash_substring.cpp | 57 ++++++++++++++++++++-- 3 files changed, 81 insertions(+), 41 deletions(-) diff --git a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp index 182c36f..6f5e9af 100644 --- a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp +++ b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp @@ -32,37 +32,22 @@ void write_responses(const vector& result) { vector process_queries(const vector& queries) { vector result; - // Keep list of all existing (i.e. not deleted yet) contacts. - vector contacts; - for (size_t i = 0; i < queries.size(); ++i) + + vector contacts(10000000); + + for (size_t i = 0; i < queries.size(); ++i) { + int j = queries[i].number; if (queries[i].type == "add") { - bool was_founded = false; - // if we already have contact with such number, - // we should rewrite contact's name - for (size_t j = 0; j < contacts.size(); ++j) - if (contacts[j].number == queries[i].number) { - contacts[j].name = queries[i].name; - was_founded = true; - break; - } - // otherwise, just add it - if (!was_founded) - contacts.push_back(queries[i]); + contacts[j] = queries[i].name; } else if (queries[i].type == "del") { - for (size_t j = 0; j < contacts.size(); ++j) - if (contacts[j].number == queries[i].number) { - contacts.erase(contacts.begin() + j); - break; - } + contacts[j] = ""; } else { - string response = "not found"; - for (size_t j = 0; j < contacts.size(); ++j) - if (contacts[j].number == queries[i].number) { - response = contacts[j].name; - break; - } - result.push_back(response); + if (contacts[j].empty()) + result.push_back("not found"); + else + result.push_back(contacts[j]); } + } return result; } diff --git a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp index 8af47d2..64a1975 100644 --- a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp +++ b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp @@ -15,7 +15,7 @@ struct Query { class QueryProcessor { int bucket_count; // store all strings in one vector - vector elems; + vector> elems; size_t hash_func(const string& s) const { static const size_t multiplier = 263; static const size_t prime = 1000000007; @@ -26,7 +26,9 @@ class QueryProcessor { } public: - explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) {} + explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) { + elems.reserve(bucket_count); + } Query readQuery() const { Query query; @@ -45,20 +47,22 @@ public: void processQuery(const Query& query) { if (query.type == "check") { // use reverse order, because we append strings to the end - for (int i = static_cast(elems.size()) - 1; i >= 0; --i) - if (hash_func(elems[i]) == query.ind) - std::cout << elems[i] << " "; + vector *list = &elems[query.ind]; + for (int i = static_cast(list->size()) - 1; i >= 0; --i) + std::cout << (*list)[i] << " "; std::cout << "\n"; } else { - vector::iterator it = std::find(elems.begin(), elems.end(), query.s); + size_t h = hash_func(query.s); + vector *list = &elems[h]; + vector::iterator it = std::find(list->begin(), list->end(), query.s); if (query.type == "find") - writeSearchResult(it != elems.end()); + writeSearchResult(it != list->end()); else if (query.type == "add") { - if (it == elems.end()) - elems.push_back(query.s); + if (it == list->end()) + list->push_back(query.s); } else if (query.type == "del") { - if (it != elems.end()) - elems.erase(it); + if (it != list->end()) + list->erase(it); } } } diff --git a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp index 3a3680a..6c9e457 100644 --- a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp +++ b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp @@ -1,5 +1,6 @@ #include #include +#include #include using std::string; @@ -15,6 +16,39 @@ Data read_input() { return data; } +unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p) +{ + unsigned long long h = 0; + /* std::cout << "hash " << x << " " << p << std::endl; */ + for (size_t i = sz; i-- > 0; ) { + h = (h * x + s[i]) % p; + /* std::cout << " " << s[i] << " " << h << std::endl; */ + } + + return h; +} + +std::vector precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x) +{ + std::vector H(T.size() - P.size() + 1); + + H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p); + + unsigned long long y = 1; + for (size_t i = 1; i <= P.size(); i++) + y = (y * x) % p; + /* std::cout << "y : " << y << std::endl; */ + + for (size_t i = H.size() - 1; i-- > 0; ) + H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p; + + /* for (size_t i = 0; i < H.size(); i++) */ + /* std::cout << H[i] << " "; */ + /* std::cout << std::endl; */ + + return H; +} + void print_occurrences(const std::vector& output) { for (size_t i = 0; i < output.size(); ++i) std::cout << output[i] << " "; @@ -23,10 +57,27 @@ void print_occurrences(const std::vector& output) { std::vector get_occurrences(const Data& input) { const string& s = input.pattern, t = input.text; + + static const unsigned long x = 6; + static const unsigned long p = 1000000007; + + unsigned long long h = poly_hash(s, s.size(), x, p); + + auto hashes = precompute_hashes(t, s, p, x); + std::vector ans; - for (size_t i = 0; i + s.size() <= t.size(); ++i) - if (t.substr(i, s.size()) == s) - ans.push_back(i); + for (size_t i = 0; i < hashes.size(); i++) + if (hashes[i] == h) { + bool match = true; + for (size_t j = 0; j < s.size(); j++) { + if (s[j] != t[i + j]) { + match = false; + break; + } + } + if (match) + ans.push_back(i); + } return ans; } -- cgit v1.1-2-g2b99