summaryrefslogtreecommitdiffstats
path: root/02-data_structures/03-hash_tables
diff options
context:
space:
mode:
authorJérémy Zurcher <jeremy@asynk.ch>2016-11-13 22:22:51 +0100
committerJérémy Zurcher <jeremy@asynk.ch>2016-11-13 22:22:51 +0100
commitb524eca827b182b0a51952557e36c945c779f848 (patch)
treebf14635a2c502826b198d07138ba038e489068e0 /02-data_structures/03-hash_tables
parente6b725d69c07d4a582d0aadc915def99d872dfde (diff)
downloadcoursera-b524eca827b182b0a51952557e36c945c779f848.zip
coursera-b524eca827b182b0a51952557e36c945c779f848.tar.gz
Algorithms : complete 02-data_structures 03-hash_tables
Diffstat (limited to '02-data_structures/03-hash_tables')
-rw-r--r--02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp39
-rw-r--r--02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp26
-rw-r--r--02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp57
3 files changed, 81 insertions, 41 deletions
diff --git a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
index 182c36f..6f5e9af 100644
--- a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
+++ b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
@@ -32,37 +32,22 @@ void write_responses(const vector<string>& result) {
vector<string> process_queries(const vector<Query>& queries) {
vector<string> result;
- // Keep list of all existing (i.e. not deleted yet) contacts.
- vector<Query> contacts;
- for (size_t i = 0; i < queries.size(); ++i)
+
+ vector<string> contacts(10000000);
+
+ for (size_t i = 0; i < queries.size(); ++i) {
+ int j = queries[i].number;
if (queries[i].type == "add") {
- bool was_founded = false;
- // if we already have contact with such number,
- // we should rewrite contact's name
- for (size_t j = 0; j < contacts.size(); ++j)
- if (contacts[j].number == queries[i].number) {
- contacts[j].name = queries[i].name;
- was_founded = true;
- break;
- }
- // otherwise, just add it
- if (!was_founded)
- contacts.push_back(queries[i]);
+ contacts[j] = queries[i].name;
} else if (queries[i].type == "del") {
- for (size_t j = 0; j < contacts.size(); ++j)
- if (contacts[j].number == queries[i].number) {
- contacts.erase(contacts.begin() + j);
- break;
- }
+ contacts[j] = "";
} else {
- string response = "not found";
- for (size_t j = 0; j < contacts.size(); ++j)
- if (contacts[j].number == queries[i].number) {
- response = contacts[j].name;
- break;
- }
- result.push_back(response);
+ if (contacts[j].empty())
+ result.push_back("not found");
+ else
+ result.push_back(contacts[j]);
}
+ }
return result;
}
diff --git a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
index 8af47d2..64a1975 100644
--- a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
+++ b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
@@ -15,7 +15,7 @@ struct Query {
class QueryProcessor {
int bucket_count;
// store all strings in one vector
- vector<string> elems;
+ vector<vector<string>> elems;
size_t hash_func(const string& s) const {
static const size_t multiplier = 263;
static const size_t prime = 1000000007;
@@ -26,7 +26,9 @@ class QueryProcessor {
}
public:
- explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) {}
+ explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) {
+ elems.reserve(bucket_count);
+ }
Query readQuery() const {
Query query;
@@ -45,20 +47,22 @@ public:
void processQuery(const Query& query) {
if (query.type == "check") {
// use reverse order, because we append strings to the end
- for (int i = static_cast<int>(elems.size()) - 1; i >= 0; --i)
- if (hash_func(elems[i]) == query.ind)
- std::cout << elems[i] << " ";
+ vector<string> *list = &elems[query.ind];
+ for (int i = static_cast<int>(list->size()) - 1; i >= 0; --i)
+ std::cout << (*list)[i] << " ";
std::cout << "\n";
} else {
- vector<string>::iterator it = std::find(elems.begin(), elems.end(), query.s);
+ size_t h = hash_func(query.s);
+ vector<string> *list = &elems[h];
+ vector<string>::iterator it = std::find(list->begin(), list->end(), query.s);
if (query.type == "find")
- writeSearchResult(it != elems.end());
+ writeSearchResult(it != list->end());
else if (query.type == "add") {
- if (it == elems.end())
- elems.push_back(query.s);
+ if (it == list->end())
+ list->push_back(query.s);
} else if (query.type == "del") {
- if (it != elems.end())
- elems.erase(it);
+ if (it != list->end())
+ list->erase(it);
}
}
}
diff --git a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
index 3a3680a..6c9e457 100644
--- a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
+++ b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
@@ -1,5 +1,6 @@
#include <iostream>
#include <string>
+#include <cmath>
#include <vector>
using std::string;
@@ -15,6 +16,39 @@ Data read_input() {
return data;
}
+unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p)
+{
+ unsigned long long h = 0;
+ /* std::cout << "hash " << x << " " << p << std::endl; */
+ for (size_t i = sz; i-- > 0; ) {
+ h = (h * x + s[i]) % p;
+ /* std::cout << " " << s[i] << " " << h << std::endl; */
+ }
+
+ return h;
+}
+
+std::vector<unsigned long long> precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x)
+{
+ std::vector<unsigned long long> H(T.size() - P.size() + 1);
+
+ H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p);
+
+ unsigned long long y = 1;
+ for (size_t i = 1; i <= P.size(); i++)
+ y = (y * x) % p;
+ /* std::cout << "y : " << y << std::endl; */
+
+ for (size_t i = H.size() - 1; i-- > 0; )
+ H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p;
+
+ /* for (size_t i = 0; i < H.size(); i++) */
+ /* std::cout << H[i] << " "; */
+ /* std::cout << std::endl; */
+
+ return H;
+}
+
void print_occurrences(const std::vector<int>& output) {
for (size_t i = 0; i < output.size(); ++i)
std::cout << output[i] << " ";
@@ -23,10 +57,27 @@ void print_occurrences(const std::vector<int>& output) {
std::vector<int> get_occurrences(const Data& input) {
const string& s = input.pattern, t = input.text;
+
+ static const unsigned long x = 6;
+ static const unsigned long p = 1000000007;
+
+ unsigned long long h = poly_hash(s, s.size(), x, p);
+
+ auto hashes = precompute_hashes(t, s, p, x);
+
std::vector<int> ans;
- for (size_t i = 0; i + s.size() <= t.size(); ++i)
- if (t.substr(i, s.size()) == s)
- ans.push_back(i);
+ for (size_t i = 0; i < hashes.size(); i++)
+ if (hashes[i] == h) {
+ bool match = true;
+ for (size_t j = 0; j < s.size(); j++) {
+ if (s[j] != t[i + j]) {
+ match = false;
+ break;
+ }
+ }
+ if (match)
+ ans.push_back(i);
+ }
return ans;
}