diff options
Diffstat (limited to '04-algorithms_on_strings/01-suffix_trees/04-suffix_tree')
-rw-r--r-- | 04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp | 143 |
1 files changed, 136 insertions, 7 deletions
diff --git a/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp b/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp index 5d9e2f1..96fdf83 100644 --- a/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp +++ b/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp @@ -1,7 +1,10 @@ #include <iostream> +#include <algorithm> +#include <cassert> #include <map> #include <string> #include <vector> +#include <stack> using std::cin; using std::cout; @@ -9,14 +12,140 @@ using std::endl; using std::map; using std::string; using std::vector; +using std::stack; -// Build a suffix tree of the string text and return a vector -// with all of the labels of its edges (the corresponding -// substrings of the text) in any order. -vector<string> ComputeSuffixTreeEdges(const string& text) { - vector<string> result; - // Implement this function yourself - return result; +int const Letters = 5; +int const NA = -1; + +struct Node +{ + int from; + int len; + int childs [Letters]; + + Node () : from(0), len(0) + { + std::fill(childs, childs + Letters, NA); + } + + Node(int f, int l) : from(f), len(l) + { + std::fill(childs, childs + Letters, NA); + } + + void print(const string& text) + { + std::cout << from << ";" << len << " "; + for(int i = from; i < (from + len); i++) + std::cout << text[i]; + std::cout << " - A:" << childs[0] << " C:" << childs[1] + << " G:" << childs[2] << " T:" << childs[3] << " $:" << childs[4]<< std::endl; + } +}; + +struct SuffixTree +{ + const string& text; + vector<Node> nodes; + + SuffixTree(const string& t) : text(t) + { + nodes.push_back(Node()); + for (int i = 0; i < t.size(); i++) + add(i, text.size() - i, 0); + } + + void add(int from, int len, int into) + { + int i = toInt(text[from]); + int n = nodes[into].childs[i]; + if (n == NA) { + nodes.push_back(Node(from, len)); + nodes[into].childs[i] = nodes.size() - 1; + } else { + insert(from, len, n, into); + } + } + + void insert(int from, int len, int n, int parent) + { + Node &node = nodes[n]; + + int i = from; + int j = node.from; + int to = from + std::min(len, node.len); + + for( ; (text[i] == text[j] && i < to); i++, j++) { } + + if (i < to) { + int matchLen = j - node.from; + int s = nodes.size(); + // new match node + nodes.push_back(Node(node.from, matchLen)); + // shrink n to mismatch part + nodes[n].from = j; + nodes[n].len = nodes[n].len - matchLen; + // parent[from] -> match[j] -> n + nodes[parent].childs[toInt(text[from])] = s; + nodes[s].childs[toInt(text[j])] = n; + // new mismatch node + nodes.push_back(Node(i, len - matchLen)); + // match[i] -> mismatch + nodes[s].childs[toInt(text[i])] = s + 1; + } else if (i == to) { + add(i, len - i + from, n); + } else if (i == from) { + std::cout << "missmatch at 0 !!!!!!!!!!!!!!!!!!" << std::endl; + nodes[n].print(text); + std::cout << &text[from]<< std::endl; + std::cout << &text[i]<< std::endl; + } + } + + void travers(vector<string> &r) + { + stack<int> s; + s.push(0); + while (!s.empty()) { + int n = s.top(); + s.pop(); + Node &node = nodes[n]; + + if (n != 0) + r.push_back(text.substr(node.from, node.len)); + + for(int i = 0; i < Letters; i++) { + int nn = node.childs[i]; + if (nn != NA) + s.push(nn); + } + } + } + + int toInt(char c) + { + switch (c) + { + case 'A': return 0; break; + case 'C': return 1; break; + case 'G': return 2; break; + case 'T': return 3; break; + case '$': return 4; break; + default: assert (false); return -1; + } + } + + +}; + +vector<string> ComputeSuffixTreeEdges(const string& text) +{ + vector<string> result; + + SuffixTree t(text); + t.travers(result); + + return result; } int main() { |