summaryrefslogtreecommitdiffstats
path: root/02-data_structures/03-hash_tables/03-hash_substring/hash_substring.cpp
blob: 6c9e4578ca238284721c3c942ba80640e9f147e3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <iostream>
#include <string>
#include <cmath>
#include <vector>

using std::string;
typedef unsigned long long ull;

struct Data {
    string pattern, text;
};

Data read_input() {
    Data data;
    std::cin >> data.pattern >> data.text;
    return data;
}

unsigned long long poly_hash(const string& s, size_t sz, unsigned long x, unsigned long p)
{
    unsigned long long h = 0;
    /* std::cout << "hash " << x << " " << p << std::endl; */
    for (size_t i = sz; i-- > 0; ) {
        h = (h * x + s[i]) % p;
        /* std::cout << " " << s[i] << " " << h << std::endl; */
    }

    return h;
}

std::vector<unsigned long long> precompute_hashes(const string& T, const string& P, unsigned long long p, unsigned long long x)
{
    std::vector<unsigned long long> H(T.size() - P.size() + 1);

    H[H.size() - 1] = poly_hash(&T[H.size() - 1], P.size(), x, p);

    unsigned long long y = 1;
    for (size_t i = 1; i <= P.size(); i++)
        y = (y * x) % p;
    /* std::cout << "y : " << y << std::endl; */

    for (size_t i = H.size() - 1; i-- > 0; )
        H[i] = ((T[i] + x * H[i + 1] - (y * T[i + P.size()] % p)) + p) % p;

    /* for (size_t i = 0; i < H.size(); i++) */
    /*     std::cout << H[i] << " "; */
    /* std::cout << std::endl; */

    return H;
}

void print_occurrences(const std::vector<int>& output) {
    for (size_t i = 0; i < output.size(); ++i)
        std::cout << output[i] << " ";
    std::cout << "\n";
}

std::vector<int> get_occurrences(const Data& input) {
    const string& s = input.pattern, t = input.text;

    static const unsigned long x = 6;
    static const unsigned long p = 1000000007;

    unsigned long long h = poly_hash(s, s.size(), x, p);

    auto hashes = precompute_hashes(t, s, p, x);

    std::vector<int> ans;
    for (size_t i = 0; i < hashes.size(); i++)
        if (hashes[i] == h) {
            bool match = true;
            for (size_t j = 0; j < s.size(); j++) {
                if (s[j] != t[i + j]) {
                    match = false;
                    break;
                }
            }
            if (match)
                ans.push_back(i);
        }
    return ans;
}


int main() {
    std::ios_base::sync_with_stdio(false);
    print_occurrences(get_occurrences(read_input()));
    return 0;
}