diff options
| author | Jérémy Zurcher <jeremy@asynk.ch> | 2016-11-14 06:44:59 +0100 | 
|---|---|---|
| committer | Jérémy Zurcher <jeremy@asynk.ch> | 2016-11-14 06:47:46 +0100 | 
| commit | 0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73 (patch) | |
| tree | 132464805ee5c6778a423ea6ec2792e0a6c79f76 /04-algorithms_on_strings | |
| parent | 26e4da9f87a7330d96bbeea0d47d875d3a96b1f8 (diff) | |
| download | coursera-0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73.zip coursera-0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73.tar.gz  | |
Algorithms : add 04-algorithms_on_strings 02-burrows_wheeler
Diffstat (limited to '04-algorithms_on_strings')
28 files changed, 216 insertions, 0 deletions
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf b/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf Binary files differnew file mode 100644 index 0000000..0e40f9d --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp new file mode 100644 index 0000000..f3b0497 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp @@ -0,0 +1,25 @@ +#include <algorithm> +#include <iostream> +#include <string> +#include <vector> + +using std::cin; +using std::cout; +using std::endl; +using std::string; +using std::vector; + +string BWT(const string& text) { +  string result = ""; + +  // write your code here + +  return result; +} + +int main() { +  string text; +  cin >> text; +  cout << BWT(text) << endl; +  return 0; +}
\ No newline at end of file diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1 new file mode 100644 index 0000000..0c6ef1d --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1 @@ -0,0 +1 @@ +AA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a new file mode 100644 index 0000000..0c6ef1d --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a @@ -0,0 +1 @@ +AA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2 new file mode 100644 index 0000000..761dc40 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2 @@ -0,0 +1 @@ +ACACACAC$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a new file mode 100644 index 0000000..96b7811 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a @@ -0,0 +1 @@ +CCCC$AAAA diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3 new file mode 100644 index 0000000..93070cb --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3 @@ -0,0 +1 @@ +AGACATA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a new file mode 100644 index 0000000..30ee3ea --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a @@ -0,0 +1 @@ +ATG$CAAA diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp new file mode 100644 index 0000000..3ec7c54 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp @@ -0,0 +1,25 @@ +#include <algorithm> +#include <iostream> +#include <string> +#include <vector> + +using std::cin; +using std::cout; +using std::endl; +using std::string; +using std::vector; + +string InverseBWT(const string& bwt) { +  string text = ""; + +  // write your code here + +  return text; +} + +int main() { +  string bwt; +  cin >> bwt; +  cout << InverseBWT(bwt) << endl; +  return 0; +} diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1 new file mode 100644 index 0000000..d2dd621 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1 @@ -0,0 +1 @@ +AC$A diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a new file mode 100644 index 0000000..e73cda6 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a @@ -0,0 +1 @@ +ACA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2 new file mode 100644 index 0000000..29e4fc0 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2 @@ -0,0 +1 @@ +AGGGAA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a new file mode 100644 index 0000000..ee229a2 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a @@ -0,0 +1 @@ +GAGAGA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp new file mode 100644 index 0000000..9353389 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp @@ -0,0 +1,65 @@ +#include <algorithm> +#include <cstdio> +#include <iostream> +#include <map> +#include <sstream> +#include <string> +#include <vector> + +using std::cin; +using std::istringstream; +using std::map; +using std::string; +using std::vector; + +// Preprocess the Burrows-Wheeler Transform bwt of some text +// and compute as a result: +//   * starts - for each character C in bwt, starts[C] is the first position  +//       of this character in the sorted array of  +//       all characters of the text. +//   * occ_count_before - for each character C in bwt and each position P in bwt, +//       occ_count_before[C][P] is the number of occurrences of character C in bwt +//       from position 0 to position P inclusive. +void PreprocessBWT(const string& bwt,  +                   map<char, int>& starts,  +                   map<char, vector<int> >& occ_count_before) { +  // Implement this function yourself +} + +// Compute the number of occurrences of string pattern in the text +// given only Burrows-Wheeler Transform bwt of the text and additional +// information we get from the preprocessing stage - starts and occ_counts_before. +int CountOccurrences(const string& pattern,  +                     const string& bwt,  +                     const map<char, int>& starts,  +                     const map<char, vector<int> >& occ_count_before) { +  // Implement this function yourself +  return 0; +} +      + +int main() { +  string bwt; +  cin >> bwt; +  int pattern_count; +  cin >> pattern_count; +  // Start of each character in the sorted list of characters of bwt, +  // see the description in the comment about function PreprocessBWT +  map<char, int> starts; +  // Occurrence counts for each character and each position in bwt, +  // see the description in the comment about function PreprocessBWT +  map<char, vector<int> > occ_count_before; +  // Preprocess the BWT once to get starts and occ_count_before. +  // For each pattern, we will then use these precomputed values and +  // spend only O(|pattern|) to find all occurrences of the pattern +  // in the text instead of O(|pattern| + |text|). +  PreprocessBWT(bwt, starts, occ_count_before); +  for (int pi = 0; pi < pattern_count; ++pi) { +    string pattern; +    cin >> pattern; +    int occ_count = CountOccurrences(pattern, bwt, starts, occ_count_before); +    printf("%d ", occ_count); +  } +  printf("\n"); +  return 0; +} diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 new file mode 100644 index 0000000..4a99802 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 @@ -0,0 +1,3 @@ +AGGGAA$ +1 +GA diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a new file mode 100644 index 0000000..d855592 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a @@ -0,0 +1 @@ +3  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 new file mode 100644 index 0000000..82a1e71 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 @@ -0,0 +1,3 @@ +ATT$AA +2 +ATA A diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a new file mode 100644 index 0000000..dcbc365 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a @@ -0,0 +1 @@ +2 3  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 new file mode 100644 index 0000000..884fa3c --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 @@ -0,0 +1,3 @@ +AT$TCTATG +2 +TCT TATG diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a new file mode 100644 index 0000000..5d5d69c --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a @@ -0,0 +1 @@ +0 0  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp new file mode 100644 index 0000000..1dbbcc3 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp @@ -0,0 +1,35 @@ +#include <algorithm> +#include <iostream> +#include <string> +#include <vector> +#include <utility> + +using std::cin; +using std::cout; +using std::endl; +using std::make_pair; +using std::pair; +using std::string; +using std::vector; + +// Build suffix array of the string text and +// return a vector result of the same length as the text +// such that the value result[i] is the index (0-based) +// in text where the i-th lexicographically smallest +// suffix of text starts. +vector<int> BuildSuffixArray(const string& text) { +  vector<int> result; +  // Implement this function yourself +  return result; +} + +int main() { +  string text; +  cin >> text; +  vector<int> suffix_array = BuildSuffixArray(text); +  for (int i = 0; i < suffix_array.size(); ++i) { +    cout << suffix_array[i] << ' '; +  } +  cout << endl; +  return 0; +} diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1 new file mode 100644 index 0000000..b97cb09 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1 @@ -0,0 +1 @@ +GAC$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a new file mode 100644 index 0000000..ee9295b --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a @@ -0,0 +1 @@ +3 1 2 0  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2 new file mode 100644 index 0000000..37abc16 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2 @@ -0,0 +1 @@ +GAGAGAGA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a new file mode 100644 index 0000000..9e98c55 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a @@ -0,0 +1 @@ +8 7 5 3 1 6 4 2 0  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3 new file mode 100644 index 0000000..bc4db06 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3 @@ -0,0 +1 @@ +AACGATAGCGGTAGA$ diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a new file mode 100644 index 0000000..33f85b0 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a @@ -0,0 +1 @@ +15 14 0 1 12 6 4 2 8 13 3 7 9 10 11 5  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/check b/04-algorithms_on_strings/02-burrows_wheeler/check new file mode 100755 index 0000000..b73ff99 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/check @@ -0,0 +1,38 @@ +#! /bin/bash + +RESET="\033[0m" +RED="\033[0;31m" +GREEN="\033[0;32m" +BROWN="\033[0;33m" + +BIN=/tmp/bin +OUTA=/tmp/_outa +OUTB=/tmp/_outb +GPP_OPTS="-std=c++11 -O2" + +for path in $(find -name \*.cpp | sort); do +    src=${path##*/} +    dir=${path%/*} +    echo -e "${RED}validate $BROWN$dir$RESET/$GREEN$src$RESET" +    pushd $dir >/dev/null || exit 1 +    echo -e "   ${RED}compile $GREEN$src$RESET" && g++ $GPP_OPTS $src -o $BIN || exit 1 +    if [ -d tests ]; then +        echo -e "   ${RED}check $GREEN$src$RESET" +        for t in $(find ./tests -name "*[^a~]"|sort); do +            if [ -f $t -a -f "$t.a" ]; then +                cat $t | $BIN > $OUTA +                cat $t.a > $OUTB +                cmp $OUTA $OUTB >/dev/null +                if [ $? -ne 0 ]; then +                    echo -e "     $BROWN$t$RESET is ${RED}KO$RESET" +                else +                    echo -e "     $BROWN$t$RESET is ${GREEN}ok$RESET" +                fi +            fi +        done +    else +        echo -e "   ${RED}no tests$RESET" +    fi +    popd > /dev/null +done +  | 
