summaryrefslogtreecommitdiffstats
path: root/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree
diff options
context:
space:
mode:
Diffstat (limited to '04-algorithms_on_strings/01-suffix_trees/04-suffix_tree')
-rw-r--r--04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp143
1 files changed, 136 insertions, 7 deletions
diff --git a/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp b/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp
index 5d9e2f1..96fdf83 100644
--- a/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp
+++ b/04-algorithms_on_strings/01-suffix_trees/04-suffix_tree/suffix_tree.cpp
@@ -1,7 +1,10 @@
#include <iostream>
+#include <algorithm>
+#include <cassert>
#include <map>
#include <string>
#include <vector>
+#include <stack>
using std::cin;
using std::cout;
@@ -9,14 +12,140 @@ using std::endl;
using std::map;
using std::string;
using std::vector;
+using std::stack;
-// Build a suffix tree of the string text and return a vector
-// with all of the labels of its edges (the corresponding
-// substrings of the text) in any order.
-vector<string> ComputeSuffixTreeEdges(const string& text) {
- vector<string> result;
- // Implement this function yourself
- return result;
+int const Letters = 5;
+int const NA = -1;
+
+struct Node
+{
+ int from;
+ int len;
+ int childs [Letters];
+
+ Node () : from(0), len(0)
+ {
+ std::fill(childs, childs + Letters, NA);
+ }
+
+ Node(int f, int l) : from(f), len(l)
+ {
+ std::fill(childs, childs + Letters, NA);
+ }
+
+ void print(const string& text)
+ {
+ std::cout << from << ";" << len << " ";
+ for(int i = from; i < (from + len); i++)
+ std::cout << text[i];
+ std::cout << " - A:" << childs[0] << " C:" << childs[1]
+ << " G:" << childs[2] << " T:" << childs[3] << " $:" << childs[4]<< std::endl;
+ }
+};
+
+struct SuffixTree
+{
+ const string& text;
+ vector<Node> nodes;
+
+ SuffixTree(const string& t) : text(t)
+ {
+ nodes.push_back(Node());
+ for (int i = 0; i < t.size(); i++)
+ add(i, text.size() - i, 0);
+ }
+
+ void add(int from, int len, int into)
+ {
+ int i = toInt(text[from]);
+ int n = nodes[into].childs[i];
+ if (n == NA) {
+ nodes.push_back(Node(from, len));
+ nodes[into].childs[i] = nodes.size() - 1;
+ } else {
+ insert(from, len, n, into);
+ }
+ }
+
+ void insert(int from, int len, int n, int parent)
+ {
+ Node &node = nodes[n];
+
+ int i = from;
+ int j = node.from;
+ int to = from + std::min(len, node.len);
+
+ for( ; (text[i] == text[j] && i < to); i++, j++) { }
+
+ if (i < to) {
+ int matchLen = j - node.from;
+ int s = nodes.size();
+ // new match node
+ nodes.push_back(Node(node.from, matchLen));
+ // shrink n to mismatch part
+ nodes[n].from = j;
+ nodes[n].len = nodes[n].len - matchLen;
+ // parent[from] -> match[j] -> n
+ nodes[parent].childs[toInt(text[from])] = s;
+ nodes[s].childs[toInt(text[j])] = n;
+ // new mismatch node
+ nodes.push_back(Node(i, len - matchLen));
+ // match[i] -> mismatch
+ nodes[s].childs[toInt(text[i])] = s + 1;
+ } else if (i == to) {
+ add(i, len - i + from, n);
+ } else if (i == from) {
+ std::cout << "missmatch at 0 !!!!!!!!!!!!!!!!!!" << std::endl;
+ nodes[n].print(text);
+ std::cout << &text[from]<< std::endl;
+ std::cout << &text[i]<< std::endl;
+ }
+ }
+
+ void travers(vector<string> &r)
+ {
+ stack<int> s;
+ s.push(0);
+ while (!s.empty()) {
+ int n = s.top();
+ s.pop();
+ Node &node = nodes[n];
+
+ if (n != 0)
+ r.push_back(text.substr(node.from, node.len));
+
+ for(int i = 0; i < Letters; i++) {
+ int nn = node.childs[i];
+ if (nn != NA)
+ s.push(nn);
+ }
+ }
+ }
+
+ int toInt(char c)
+ {
+ switch (c)
+ {
+ case 'A': return 0; break;
+ case 'C': return 1; break;
+ case 'G': return 2; break;
+ case 'T': return 3; break;
+ case '$': return 4; break;
+ default: assert (false); return -1;
+ }
+ }
+
+
+};
+
+vector<string> ComputeSuffixTreeEdges(const string& text)
+{
+ vector<string> result;
+
+ SuffixTree t(text);
+ t.travers(result);
+
+ return result;
}
int main() {