summaryrefslogtreecommitdiffstats
path: root/04-algorithms_on_strings
diff options
context:
space:
mode:
authorJérémy Zurcher <jeremy@asynk.ch>2016-11-14 06:44:59 +0100
committerJérémy Zurcher <jeremy@asynk.ch>2016-11-14 06:47:46 +0100
commit0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73 (patch)
tree132464805ee5c6778a423ea6ec2792e0a6c79f76 /04-algorithms_on_strings
parent26e4da9f87a7330d96bbeea0d47d875d3a96b1f8 (diff)
downloadcoursera-0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73.zip
coursera-0bc44a8bcf77a81677d8257eb5c4190ff5d5dd73.tar.gz
Algorithms : add 04-algorithms_on_strings 02-burrows_wheeler
Diffstat (limited to '04-algorithms_on_strings')
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdfbin0 -> 497428 bytes
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp25
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample11
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample21
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample31
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp25
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample11
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample21
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp65
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample13
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample23
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample33
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp35
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample11
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample21
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a1
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample31
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a1
-rwxr-xr-x04-algorithms_on_strings/02-burrows_wheeler/check38
28 files changed, 216 insertions, 0 deletions
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf b/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf
new file mode 100644
index 0000000..0e40f9d
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/00_burrows_wheeler.pdf
Binary files differ
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp
new file mode 100644
index 0000000..f3b0497
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/bwt.cpp
@@ -0,0 +1,25 @@
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+using std::cin;
+using std::cout;
+using std::endl;
+using std::string;
+using std::vector;
+
+string BWT(const string& text) {
+ string result = "";
+
+ // write your code here
+
+ return result;
+}
+
+int main() {
+ string text;
+ cin >> text;
+ cout << BWT(text) << endl;
+ return 0;
+} \ No newline at end of file
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1
new file mode 100644
index 0000000..0c6ef1d
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1
@@ -0,0 +1 @@
+AA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a
new file mode 100644
index 0000000..0c6ef1d
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample1.a
@@ -0,0 +1 @@
+AA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2
new file mode 100644
index 0000000..761dc40
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2
@@ -0,0 +1 @@
+ACACACAC$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a
new file mode 100644
index 0000000..96b7811
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample2.a
@@ -0,0 +1 @@
+CCCC$AAAA
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3
new file mode 100644
index 0000000..93070cb
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3
@@ -0,0 +1 @@
+AGACATA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a
new file mode 100644
index 0000000..30ee3ea
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/01-bwt/tests/sample3.a
@@ -0,0 +1 @@
+ATG$CAAA
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp
new file mode 100644
index 0000000..3ec7c54
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/bwtinverse.cpp
@@ -0,0 +1,25 @@
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+using std::cin;
+using std::cout;
+using std::endl;
+using std::string;
+using std::vector;
+
+string InverseBWT(const string& bwt) {
+ string text = "";
+
+ // write your code here
+
+ return text;
+}
+
+int main() {
+ string bwt;
+ cin >> bwt;
+ cout << InverseBWT(bwt) << endl;
+ return 0;
+}
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1
new file mode 100644
index 0000000..d2dd621
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1
@@ -0,0 +1 @@
+AC$A
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a
new file mode 100644
index 0000000..e73cda6
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample1.a
@@ -0,0 +1 @@
+ACA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2
new file mode 100644
index 0000000..29e4fc0
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2
@@ -0,0 +1 @@
+AGGGAA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a
new file mode 100644
index 0000000..ee229a2
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/02-bwtinverse/tests/sample2.a
@@ -0,0 +1 @@
+GAGAGA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp
new file mode 100644
index 0000000..9353389
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp
@@ -0,0 +1,65 @@
+#include <algorithm>
+#include <cstdio>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+using std::cin;
+using std::istringstream;
+using std::map;
+using std::string;
+using std::vector;
+
+// Preprocess the Burrows-Wheeler Transform bwt of some text
+// and compute as a result:
+// * starts - for each character C in bwt, starts[C] is the first position
+// of this character in the sorted array of
+// all characters of the text.
+// * occ_count_before - for each character C in bwt and each position P in bwt,
+// occ_count_before[C][P] is the number of occurrences of character C in bwt
+// from position 0 to position P inclusive.
+void PreprocessBWT(const string& bwt,
+ map<char, int>& starts,
+ map<char, vector<int> >& occ_count_before) {
+ // Implement this function yourself
+}
+
+// Compute the number of occurrences of string pattern in the text
+// given only Burrows-Wheeler Transform bwt of the text and additional
+// information we get from the preprocessing stage - starts and occ_counts_before.
+int CountOccurrences(const string& pattern,
+ const string& bwt,
+ const map<char, int>& starts,
+ const map<char, vector<int> >& occ_count_before) {
+ // Implement this function yourself
+ return 0;
+}
+
+
+int main() {
+ string bwt;
+ cin >> bwt;
+ int pattern_count;
+ cin >> pattern_count;
+ // Start of each character in the sorted list of characters of bwt,
+ // see the description in the comment about function PreprocessBWT
+ map<char, int> starts;
+ // Occurrence counts for each character and each position in bwt,
+ // see the description in the comment about function PreprocessBWT
+ map<char, vector<int> > occ_count_before;
+ // Preprocess the BWT once to get starts and occ_count_before.
+ // For each pattern, we will then use these precomputed values and
+ // spend only O(|pattern|) to find all occurrences of the pattern
+ // in the text instead of O(|pattern| + |text|).
+ PreprocessBWT(bwt, starts, occ_count_before);
+ for (int pi = 0; pi < pattern_count; ++pi) {
+ string pattern;
+ cin >> pattern;
+ int occ_count = CountOccurrences(pattern, bwt, starts, occ_count_before);
+ printf("%d ", occ_count);
+ }
+ printf("\n");
+ return 0;
+}
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1
new file mode 100644
index 0000000..4a99802
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1
@@ -0,0 +1,3 @@
+AGGGAA$
+1
+GA
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a
new file mode 100644
index 0000000..d855592
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a
@@ -0,0 +1 @@
+3
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2
new file mode 100644
index 0000000..82a1e71
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2
@@ -0,0 +1,3 @@
+ATT$AA
+2
+ATA A
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a
new file mode 100644
index 0000000..dcbc365
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a
@@ -0,0 +1 @@
+2 3
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3
new file mode 100644
index 0000000..884fa3c
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3
@@ -0,0 +1,3 @@
+AT$TCTATG
+2
+TCT TATG
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a
new file mode 100644
index 0000000..5d5d69c
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a
@@ -0,0 +1 @@
+0 0
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp
new file mode 100644
index 0000000..1dbbcc3
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/suffix_array.cpp
@@ -0,0 +1,35 @@
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <utility>
+
+using std::cin;
+using std::cout;
+using std::endl;
+using std::make_pair;
+using std::pair;
+using std::string;
+using std::vector;
+
+// Build suffix array of the string text and
+// return a vector result of the same length as the text
+// such that the value result[i] is the index (0-based)
+// in text where the i-th lexicographically smallest
+// suffix of text starts.
+vector<int> BuildSuffixArray(const string& text) {
+ vector<int> result;
+ // Implement this function yourself
+ return result;
+}
+
+int main() {
+ string text;
+ cin >> text;
+ vector<int> suffix_array = BuildSuffixArray(text);
+ for (int i = 0; i < suffix_array.size(); ++i) {
+ cout << suffix_array[i] << ' ';
+ }
+ cout << endl;
+ return 0;
+}
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1
new file mode 100644
index 0000000..b97cb09
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1
@@ -0,0 +1 @@
+GAC$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a
new file mode 100644
index 0000000..ee9295b
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample1.a
@@ -0,0 +1 @@
+3 1 2 0
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2
new file mode 100644
index 0000000..37abc16
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2
@@ -0,0 +1 @@
+GAGAGAGA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a
new file mode 100644
index 0000000..9e98c55
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample2.a
@@ -0,0 +1 @@
+8 7 5 3 1 6 4 2 0
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3
new file mode 100644
index 0000000..bc4db06
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3
@@ -0,0 +1 @@
+AACGATAGCGGTAGA$
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a
new file mode 100644
index 0000000..33f85b0
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/04-suffix_array/tests/sample3.a
@@ -0,0 +1 @@
+15 14 0 1 12 6 4 2 8 13 3 7 9 10 11 5
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/check b/04-algorithms_on_strings/02-burrows_wheeler/check
new file mode 100755
index 0000000..b73ff99
--- /dev/null
+++ b/04-algorithms_on_strings/02-burrows_wheeler/check
@@ -0,0 +1,38 @@
+#! /bin/bash
+
+RESET="\033[0m"
+RED="\033[0;31m"
+GREEN="\033[0;32m"
+BROWN="\033[0;33m"
+
+BIN=/tmp/bin
+OUTA=/tmp/_outa
+OUTB=/tmp/_outb
+GPP_OPTS="-std=c++11 -O2"
+
+for path in $(find -name \*.cpp | sort); do
+ src=${path##*/}
+ dir=${path%/*}
+ echo -e "${RED}validate $BROWN$dir$RESET/$GREEN$src$RESET"
+ pushd $dir >/dev/null || exit 1
+ echo -e " ${RED}compile $GREEN$src$RESET" && g++ $GPP_OPTS $src -o $BIN || exit 1
+ if [ -d tests ]; then
+ echo -e " ${RED}check $GREEN$src$RESET"
+ for t in $(find ./tests -name "*[^a~]"|sort); do
+ if [ -f $t -a -f "$t.a" ]; then
+ cat $t | $BIN > $OUTA
+ cat $t.a > $OUTB
+ cmp $OUTA $OUTB >/dev/null
+ if [ $? -ne 0 ]; then
+ echo -e " $BROWN$t$RESET is ${RED}KO$RESET"
+ else
+ echo -e " $BROWN$t$RESET is ${GREEN}ok$RESET"
+ fi
+ fi
+ done
+ else
+ echo -e " ${RED}no tests$RESET"
+ fi
+ popd > /dev/null
+done
+