Algorithms : complete 02-data_structures 03-hash_tables

author: Jérémy Zurcher <jeremy@asynk.ch> 2016-11-13 22:22:51 +0100
committer: Jérémy Zurcher <jeremy@asynk.ch> 2016-11-13 22:22:51 +0100
commit: b524eca827b182b0a51952557e36c945c779f848 (patch)
tree: bf14635a2c502826b198d07138ba038e489068e0 /02-data_structures/03-hash_tables
parent: e6b725d69c07d4a582d0aadc915def99d872dfde (diff)
download: coursera-b524eca827b182b0a51952557e36c945c779f848.zip
coursera-b524eca827b182b0a51952557e36c945c779f848.tar.gz
3 files changed, 81 insertions, 41 deletions
diff --git a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
index 182c36f..6f5e9af 100644
--- a/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
+++ b/02-data_structures/03-hash_tables/01-phone_book/phone_book.cpp
@@ -32,37 +32,22 @@ void write_responses(const vector<string>& result) {
 
 vector<string> process_queries(const vector<Query>& queries) {
     vector<string> result;
-    // Keep list of all existing (i.e. not deleted yet) contacts.
-    vector<Query> contacts;
-    for (size_t i = 0; i < queries.size(); ++i)
+
+    vector<string> contacts(10000000);
+
+    for (size_t i = 0; i < queries.size(); ++i) {
+        int j = queries[i].number;
         if (queries[i].type == "add") {
-            bool was_founded = false;
-            // if we already have contact with such number,
-            // we should rewrite contact's name
-            for (size_t j = 0; j < contacts.size(); ++j)
-                if (contacts[j].number == queries[i].number) {
-                    contacts[j].name = queries[i].name;
-                    was_founded = true;
-                    break;
-                }
-            // otherwise, just add it
-            if (!was_founded)
-                contacts.push_back(queries[i]);
+            contacts[j] = queries[i].name;
         } else if (queries[i].type == "del") {
-            for (size_t j = 0; j < contacts.size(); ++j)
-                if (contacts[j].number == queries[i].number) {
-                    contacts.erase(contacts.begin() + j);
-                    break;
-                }
+            contacts[j] = "";
         } else {
-            string response = "not found";
-            for (size_t j = 0; j < contacts.size(); ++j)
-                if (contacts[j].number == queries[i].number) {
-                    response = contacts[j].name;
-                    break;
-                }
-            result.push_back(response);
+            if (contacts[j].empty())
+                result.push_back("not found");
+            else
+                result.push_back(contacts[j]);
         }
+    }
     return result;
 }
 
diff --git a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
index 8af47d2..64a1975 100644
--- a/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
+++ b/02-data_structures/03-hash_tables/02-hash_chains/hash_chains.cpp
@@ -15,7 +15,7 @@ struct Query {
 class QueryProcessor {
     int bucket_count;
     // store all strings in one vector
-    vector<string> elems;
+    vector<vector<string>> elems;
     size_t hash_func(const string& s) const {
         static const size_t multiplier = 263;
         static const size_t prime = 1000000007;
@@ -26,7 +26,9 @@ class QueryProcessor {
     }
 
 public:
-    explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) {}
+    explicit QueryProcessor(int bucket_count): bucket_count(bucket_count) {
+        elems.reserve(bucket_count);
+    }
 
     Query readQuery() const {
         Query query;
@@ -45,20 +47,22 @@ public:
     void processQuery(const Query& query) {
         if (query.type == "check") {
             // use reverse order, because we append strings to the end
-            for (int i = static_cast<int>(elems.size()) - 1; i >= 0; --i)
-                if (hash_func(elems[i]) == query.ind)
-                    std::cout << elems[i] << " ";
+            vector<string> *list = &elems[query.ind];
+            for (int i = static_cast<int>(list->size()) - 1; i >= 0; --i)
+                    std::cout << (*list)[i] << " ";
             std::cout << "\n";
         } else {
-            vector<string>::iterator it = std::find(elems.begin(), elems.end(), query.s);
+            size_t h = hash_func(query.s);
+            vector<string> *list = &elems[h];
+            vector<string>::iterator it = std::find(list->begin(), list->end(), query.s);
             if (query.type == "find")
-                writeSearchResult(it != elems.end());
+                writeSearchResult(it != list->end());
             else if (query.type == "add") {
-                if (it == elems.end())
-                    elems.push_back(query.s);
+                if (it == list->end())
+                    list->push_back(query.s);
             } else if (query.type == "del") {
-                if (it != elems.end())
-                    elems.erase(it);
+                if (it != list->end())
+                    list->erase(it);
             }
         }
     }
diff --git a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
index 3a3680a..6c9e457 100644
--- a/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
+++ b/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
@@ -1,5 +1,6 @@
 #include <iostream>
 #include <string>
+#include <cmath>
 #include <vector>
 
 using std::string;
@@ -15,6 +16,39 @@ Data read_input() {
     return data;
 }
 
+unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p)
+{
+    unsigned long long h = 0;
+    /* std::cout << "hash " << x << " " << p << std::endl; */
+    for (size_t i = sz; i-- > 0; ) {
+        h = (h * x + s[i]) % p;
+        /* std::cout << " " << s[i] << " " << h << std::endl; */
+    }
+
+    return h;
+}
+
+std::vector<unsigned long long> precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x)
+{
+    std::vector<unsigned long long> H(T.size() - P.size() + 1);
+
+    H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p);
+
+    unsigned long long y = 1;
+    for (size_t i = 1; i <= P.size(); i++)
+        y = (y * x) % p;
+    /* std::cout << "y : " << y << std::endl; */
+
+    for (size_t i = H.size() - 1; i-- > 0; )
+        H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p;
+
+    /* for (size_t i = 0; i < H.size(); i++) */
+    /*     std::cout << H[i] << " "; */
+    /* std::cout << std::endl; */
+
+    return H;
+}
+
 void print_occurrences(const std::vector<int>& output) {
     for (size_t i = 0; i < output.size(); ++i)
         std::cout << output[i] << " ";
@@ -23,10 +57,27 @@ void print_occurrences(const std::vector<int>& output) {
 
 std::vector<int> get_occurrences(const Data& input) {
     const string& s = input.pattern, t = input.text;
+
+    static const unsigned long x = 6;
+    static const unsigned long p = 1000000007;
+
+    unsigned long long h = poly_hash(s, s.size(), x, p);
+
+    auto hashes = precompute_hashes(t, s, p, x);
+
     std::vector<int> ans;
-    for (size_t i = 0; i + s.size() <= t.size(); ++i)
-        if (t.substr(i, s.size()) == s)
-            ans.push_back(i);
+    for (size_t i = 0; i < hashes.size(); i++)
+        if (hashes[i] == h) {
+            bool match = true;
+            for (size_t j = 0; j < s.size(); j++) {
+                if (s[j] != t[i + j]) {
+                    match = false;
+                    break;
+                }
+            }
+            if (match)
+                ans.push_back(i);
+        }
     return ans;
 }
author	Jérémy Zurcher <jeremy@asynk.ch>	2016-11-13 22:22:51 +0100
committer	Jérémy Zurcher <jeremy@asynk.ch>	2016-11-13 22:22:51 +0100
commit	b524eca827b182b0a51952557e36c945c779f848 (patch)
tree	bf14635a2c502826b198d07138ba038e489068e0 /02-data_structures/03-hash_tables
parent	e6b725d69c07d4a582d0aadc915def99d872dfde (diff)
download	coursera-b524eca827b182b0a51952557e36c945c779f848.zip coursera-b524eca827b182b0a51952557e36c945c779f848.tar.gz