diff options
| author | Jérémy Zurcher <jeremy@asynk.ch> | 2016-11-14 07:00:58 +0100 | 
|---|---|---|
| committer | Jérémy Zurcher <jeremy@asynk.ch> | 2016-11-14 07:00:58 +0100 | 
| commit | 7471f538c2203f266cb8002fb001183a9720a969 (patch) | |
| tree | 1ae9d4b366aa806391f2d40e2f458c5a7aa0f09e /04-algorithms_on_strings/04-suffix_array | |
| parent | eb0a213d040fb3b116ec030a6755216ce5e61ee9 (diff) | |
| download | coursera-7471f538c2203f266cb8002fb001183a9720a969.zip coursera-7471f538c2203f266cb8002fb001183a9720a969.tar.gz | |
Algorithms : add 04-algorithms_on_strings 04-suffix_array
Diffstat (limited to '04-algorithms_on_strings/04-suffix_array')
32 files changed, 315 insertions, 0 deletions
| diff --git a/04-algorithms_on_strings/04-suffix_array/00_suffix_array.pdf b/04-algorithms_on_strings/04-suffix_array/00_suffix_array.pdfBinary files differ new file mode 100644 index 0000000..3937c0f --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/00_suffix_array.pdf diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/kmp.cpp b/04-algorithms_on_strings/04-suffix_array/01-kmp/kmp.cpp new file mode 100644 index 0000000..13121ae --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/kmp.cpp @@ -0,0 +1,29 @@ +#include <cstdio> +#include <iostream> +#include <string> +#include <vector> + +using std::cin; +using std::string; +using std::vector; + +// Find all occurrences of the pattern in the text and return a +// vector with all positions in the text (starting from 0) where  +// the pattern starts in the text. +vector<int> find_pattern(const string& pattern, const string& text) { +  vector<int> result; +  // Implement this function yourself +  return result; +} + +int main() { +  string pattern, text; +  cin >> pattern; +  cin >> text; +  vector<int> result = find_pattern(pattern, text); +  for (int i = 0; i < result.size(); ++i) { +    printf("%d ", result[i]); +  } +  printf("\n"); +  return 0; +} diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1 b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1 new file mode 100644 index 0000000..66fbc8f --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1 @@ -0,0 +1,2 @@ +TACG +GT diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1.a b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1.a new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample1.a @@ -0,0 +1 @@ + diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2 b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2 new file mode 100644 index 0000000..636224a --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2 @@ -0,0 +1,2 @@ +ATA +ATATA diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2.a b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2.a new file mode 100644 index 0000000..fa581ef --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample2.a @@ -0,0 +1 @@ +0 2  diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3 b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3 new file mode 100644 index 0000000..0a08309 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3 @@ -0,0 +1,2 @@ +ATAT +GATATATGCATATACTT diff --git a/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3.a b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3.a new file mode 100644 index 0000000..5ac4cba --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/01-kmp/tests/sample3.a @@ -0,0 +1 @@ +1 3 9  diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/suffix_array_long.cpp b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/suffix_array_long.cpp new file mode 100644 index 0000000..1dbbcc3 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/suffix_array_long.cpp @@ -0,0 +1,35 @@ +#include <algorithm> +#include <iostream> +#include <string> +#include <vector> +#include <utility> + +using std::cin; +using std::cout; +using std::endl; +using std::make_pair; +using std::pair; +using std::string; +using std::vector; + +// Build suffix array of the string text and +// return a vector result of the same length as the text +// such that the value result[i] is the index (0-based) +// in text where the i-th lexicographically smallest +// suffix of text starts. +vector<int> BuildSuffixArray(const string& text) { +  vector<int> result; +  // Implement this function yourself +  return result; +} + +int main() { +  string text; +  cin >> text; +  vector<int> suffix_array = BuildSuffixArray(text); +  for (int i = 0; i < suffix_array.size(); ++i) { +    cout << suffix_array[i] << ' '; +  } +  cout << endl; +  return 0; +} diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1 b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1 new file mode 100644 index 0000000..0ddcec2 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1 @@ -0,0 +1 @@ +AAA$ diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1.a b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1.a new file mode 100644 index 0000000..9eaba84 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample1.a @@ -0,0 +1 @@ +3 2 1 0  diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2 b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2 new file mode 100644 index 0000000..b97cb09 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2 @@ -0,0 +1 @@ +GAC$ diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2.a b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2.a new file mode 100644 index 0000000..ee9295b --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample2.a @@ -0,0 +1 @@ +3 1 2 0  diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3 b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3 new file mode 100644 index 0000000..37abc16 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3 @@ -0,0 +1 @@ +GAGAGAGA$ diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3.a b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3.a new file mode 100644 index 0000000..9e98c55 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample3.a @@ -0,0 +1 @@ +8 7 5 3 1 6 4 2 0  diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4 b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4 new file mode 100644 index 0000000..bc4db06 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4 @@ -0,0 +1 @@ +AACGATAGCGGTAGA$ diff --git a/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4.a b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4.a new file mode 100644 index 0000000..33f85b0 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/02-suffix_array_long/tests/sample4.a @@ -0,0 +1 @@ +15 14 0 1 12 6 4 2 8 13 3 7 9 10 11 5  diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/suffix_array_matching.cpp b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/suffix_array_matching.cpp new file mode 100644 index 0000000..204218c --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/suffix_array_matching.cpp @@ -0,0 +1,51 @@ +#include <cstdio> +#include <iostream> +#include <string> +#include <vector> + +using std::cin; +using std::string; +using std::vector; + +vector<int> BuildSuffixArray(const string& text) { +  vector<int> order; + +  // write your code here + +  return order; +} + + +vector<int> FindOccurrences(const string& pattern, const string& text, const vector<int>& suffix_array) { +  vector<int> result; + +  // write your code here + +  return result; +} + +int main() { +  char buffer[100001]; +  scanf("%s", buffer); +  string text = buffer; +  text += '$'; +  vector<int> suffix_array = BuildSuffixArray(text); +  int pattern_count; +  scanf("%d", &pattern_count); +  vector<bool> occurs(text.length(), false); +  for (int pattern_index = 0; pattern_index < pattern_count; ++pattern_index) { +    scanf("%s", buffer); +    string pattern = buffer; +    vector<int> occurrences = FindOccurrences(pattern, text, suffix_array); +    for (int j = 0; j < occurrences.size(); ++j) { +      occurs[occurrences[j]] = true; +    } +  } +  for (int i = 0; i < occurs.size(); ++i) { +    if (occurs[i]) { +      printf("%d ", i); +    } +  } +  printf("\n"); +  return 0; +} diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1 b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1 new file mode 100644 index 0000000..19bda23 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1 @@ -0,0 +1,3 @@ +AAA +1 +A diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1.a b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1.a new file mode 100644 index 0000000..66ecee2 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample1.a @@ -0,0 +1 @@ +0 1 2  diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2 b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2 new file mode 100644 index 0000000..c5ce6bc --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2 @@ -0,0 +1,3 @@ +ATA +3 +C G C diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2.a b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2.a new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample2.a @@ -0,0 +1 @@ + diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3 b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3 new file mode 100644 index 0000000..56913d4 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3 @@ -0,0 +1,3 @@ +ATATATA +3 +ATA C TATAT diff --git a/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3.a b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3.a new file mode 100644 index 0000000..7860b9c --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/03-suffix_array_matching/tests/sample3.a @@ -0,0 +1 @@ +0 1 2 4  diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/suffix_tree_from_array.cpp b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/suffix_tree_from_array.cpp new file mode 100644 index 0000000..ca9fdb5 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/suffix_tree_from_array.cpp @@ -0,0 +1,105 @@ +#include <algorithm> +#include <cstdio> +#include <map> +#include <string> +#include <utility> +#include <vector> + +using std::make_pair; +using std::map; +using std::pair; +using std::string; +using std::vector; + +// Data structure to store edges of a suffix tree. +struct Edge { +  // The ending node of this edge. +  int node; +  // Starting position of the substring of the text  +  // corresponding to the label of this edge. +  int start; +  // Position right after the end of the substring of the text  +  // corresponding to the label of this edge. +  int end; + +  Edge(int node_, int start_, int end_) : node(node_), start(start_), end(end_) {} +  Edge(const Edge& e) : node(e.node), start(e.start), end(e.end) {} +}; + +// Build suffix tree of the string text given its suffix array suffix_array +// and LCP array lcp_array. Return the tree as a mapping from a node ID +// to the vector of all outgoing edges of the corresponding node. The edges in the +// vector must be sorted in the ascending order by the first character of the edge label. +// Root must have node ID = 0, and all other node IDs must be different +// nonnegative integers. +// +// For example, if text = "ACACAA$", an edge with label "$" from root to a node with ID 1 +// must be represented by Edge(1, 6, 7). This edge must be present in the vector tree[0] +// (corresponding to the root node), and it should be the first edge in the vector  +// (because it has the smallest first character of all edges outgoing from the root). +map<int, vector<Edge> > SuffixTreeFromSuffixArray( +    const vector<int>& suffix_array, +    const vector<int>& lcp_array, +    const string& text) { +  map<int, vector<Edge> > tree; +  // Implement this function yourself +  return tree; +} + + +int main() { +  char buffer[200001]; +  scanf("%s", buffer); +  string text = buffer; +  vector<int> suffix_array(text.length()); +  for (int i = 0; i < text.length(); ++i) { +    scanf("%d", &suffix_array[i]); +  } +  vector<int> lcp_array(text.length() - 1); +  for (int i = 0; i + 1 < text.length(); ++i) { +    scanf("%d", &lcp_array[i]); +  } +  // Build the suffix tree and get a mapping from  +  // suffix tree node ID to the list of outgoing Edges. +  map<int, vector<Edge> > tree = SuffixTreeFromSuffixArray(suffix_array, lcp_array, text); +  printf("%s\n", buffer); +  // Output the edges of the suffix tree in the required order. +  // Note that we use here the contract that the root of the tree +  // will have node ID = 0 and that each vector of outgoing edges +  // will be sorted by the first character of the corresponding edge label. +  // +  // The following code avoids recursion to avoid stack overflow issues. +  // It uses a stack to convert recursive function to a while loop. +  // The stack stores pairs (node, edge_index).  +  // This code is an equivalent of  +  // +  //    OutputEdges(tree, 0); +  // +  // for the following _recursive_ function OutputEdges: +  // +  // void OutputEdges(map<int, vector<Edge> > tree, int node_id) { +  //   const vector<Edge>& edges = tree[node_id]; +  //   for (int edge_index = 0; edge_index < edges.size(); ++edge_index) { +  //     printf("%d %d\n", edges[edge_index].start, edges[edge_index].end); +  //     OutputEdges(tree, edges[edge_index].node); +  //   } +  // } +  // +  vector<pair<int, int> > stack(1, make_pair(0, 0));     +  while (!stack.empty()) { +    pair<int, int> p = stack.back(); +    stack.pop_back(); +    int node = p.first; +    int edge_index = p.second; +    if (!tree.count(node)) { +      continue; +    } +    const vector<Edge>& edges = tree[node]; +    if (edge_index + 1 < edges.size()) { +      stack.push_back(make_pair(node, edge_index + 1)); +    } +    printf("%d %d\n", edges[edge_index].start, edges[edge_index].end); +    stack.push_back(make_pair(edges[edge_index].node, 0)); +  } +  return 0; +} diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1 b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1 new file mode 100644 index 0000000..1e5e8d8 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1 @@ -0,0 +1,3 @@ +A$ +1 0 +0 diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1.a b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1.a new file mode 100644 index 0000000..9f639b4 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample1.a @@ -0,0 +1,3 @@ +A$ +1 2 +0 2 diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2 b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2 new file mode 100644 index 0000000..8be8bb4 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2 @@ -0,0 +1,3 @@ +AAA$ +3 2 1 0 +0 1 2 diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2.a b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2.a new file mode 100644 index 0000000..ff12930 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample2.a @@ -0,0 +1,7 @@ +AAA$ +3 4 +1 2 +3 4 +1 2 +3 4 +2 4 diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3 b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3 new file mode 100644 index 0000000..84b1ce6 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3 @@ -0,0 +1,3 @@ +GTAGT$ +5 2 3 0 4 1 +0 0 2 0 1 diff --git a/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3.a b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3.a new file mode 100644 index 0000000..b13e4a6 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/04-suffix_tree_from_array/tests/sample3.a @@ -0,0 +1,9 @@ +GTAGT$ +5 6 +2 6 +0 2 +5 6 +2 6 +1 2 +5 6 +2 6 diff --git a/04-algorithms_on_strings/04-suffix_array/check b/04-algorithms_on_strings/04-suffix_array/check new file mode 100755 index 0000000..b73ff99 --- /dev/null +++ b/04-algorithms_on_strings/04-suffix_array/check @@ -0,0 +1,38 @@ +#! /bin/bash + +RESET="\033[0m" +RED="\033[0;31m" +GREEN="\033[0;32m" +BROWN="\033[0;33m" + +BIN=/tmp/bin +OUTA=/tmp/_outa +OUTB=/tmp/_outb +GPP_OPTS="-std=c++11 -O2" + +for path in $(find -name \*.cpp | sort); do +    src=${path##*/} +    dir=${path%/*} +    echo -e "${RED}validate $BROWN$dir$RESET/$GREEN$src$RESET" +    pushd $dir >/dev/null || exit 1 +    echo -e "   ${RED}compile $GREEN$src$RESET" && g++ $GPP_OPTS $src -o $BIN || exit 1 +    if [ -d tests ]; then +        echo -e "   ${RED}check $GREEN$src$RESET" +        for t in $(find ./tests -name "*[^a~]"|sort); do +            if [ -f $t -a -f "$t.a" ]; then +                cat $t | $BIN > $OUTA +                cat $t.a > $OUTB +                cmp $OUTA $OUTB >/dev/null +                if [ $? -ne 0 ]; then +                    echo -e "     $BROWN$t$RESET is ${RED}KO$RESET" +                else +                    echo -e "     $BROWN$t$RESET is ${GREEN}ok$RESET" +                fi +            fi +        done +    else +        echo -e "   ${RED}no tests$RESET" +    fi +    popd > /dev/null +done + | 
