summaryrefslogtreecommitdiffstats
path: root/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching
diff options
context:
space:
mode:
Diffstat (limited to '04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching')
-rw-r--r--04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp108
1 files changed, 95 insertions, 13 deletions
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp
index 9353389..10551df 100644
--- a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp
+++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp
@@ -12,31 +12,113 @@ using std::map;
using std::string;
using std::vector;
+static bool debug = false;
+
// Preprocess the Burrows-Wheeler Transform bwt of some text
// and compute as a result:
-// * starts - for each character C in bwt, starts[C] is the first position
-// of this character in the sorted array of
+// * starts - for each character C in bwt, starts[C] is the first position
+// of this character in the sorted array of
// all characters of the text.
// * occ_count_before - for each character C in bwt and each position P in bwt,
// occ_count_before[C][P] is the number of occurrences of character C in bwt
// from position 0 to position P inclusive.
-void PreprocessBWT(const string& bwt,
- map<char, int>& starts,
- map<char, vector<int> >& occ_count_before) {
- // Implement this function yourself
+void PreprocessBWT(const string& bwt,
+ map<char, int>& starts,
+ map<char, vector<int> >& occ_count_before)
+{
+ starts.insert(std::pair<char,int>('A', -1));
+ starts.insert(std::pair<char,int>('C', -1));
+ starts.insert(std::pair<char,int>('G', -1));
+ starts.insert(std::pair<char,int>('T', -1));
+ occ_count_before.insert(std::pair<char,vector<int>>('A', vector<int>(bwt.size() + 1)));
+ occ_count_before.insert(std::pair<char,vector<int>>('C', vector<int>(bwt.size() + 1)));
+ occ_count_before.insert(std::pair<char,vector<int>>('G', vector<int>(bwt.size() + 1)));
+ occ_count_before.insert(std::pair<char,vector<int>>('T', vector<int>(bwt.size() + 1)));
+
+ int count[] = {0, 0, 0, 0, 0};
+ for (int i = 0; i < bwt.size(); i ++) {
+ occ_count_before['A'][i] = count[0];
+ occ_count_before['C'][i] = count[1];
+ occ_count_before['G'][i] = count[2];
+ occ_count_before['T'][i] = count[3];
+ switch(bwt[i]) {
+ case 'A':
+ count[0] += 1;
+ break;
+ case 'C':
+ count[1] += 1;
+ break;
+ case 'G':
+ count[2] += 1;
+ break;
+ case 'T':
+ count[3] += 1;
+ break;
+ default:
+ break;
+ }
+ }
+ occ_count_before['A'][bwt.size()] = count[0];
+ occ_count_before['C'][bwt.size()] = count[1];
+ occ_count_before['G'][bwt.size()] = count[2];
+ occ_count_before['T'][bwt.size()] = count[3];
+
+ int x = 1;
+ if (count[0] != 0) {
+ starts['A'] = x;
+ x += count[0];
+ }
+ if (count[1] != 0) {
+ starts['C'] = x;
+ x += count[1];
+ }
+ if (count[2] != 0) {
+ starts['G'] = x;
+ x += count[2];
+ }
+ if (count[3] != 0) {
+ starts['T'] = x;
+ x += count[3];
+ }
+
+ if (debug) {
+ std::cout << bwt << std::endl;
+ std::cout << "starts : " << std::endl;
+ for (std::map<char,int>::iterator it = starts.begin(); it != starts.end(); it++)
+ std::cout << it->first << " => " << it->second << std::endl;
+ for (std::map<char,vector<int>>::iterator it = occ_count_before.begin(); it != occ_count_before.end(); it++) {
+ std::cout << it->first << " => ";
+ for (int i = 0; i <= bwt.size(); i++)
+ std::cout << it->second[i] << " ";
+ std::cout << std::endl;
+ }
+ std::cout << std::endl;
+ }
}
// Compute the number of occurrences of string pattern in the text
// given only Burrows-Wheeler Transform bwt of the text and additional
// information we get from the preprocessing stage - starts and occ_counts_before.
-int CountOccurrences(const string& pattern,
- const string& bwt,
- const map<char, int>& starts,
- const map<char, vector<int> >& occ_count_before) {
- // Implement this function yourself
- return 0;
+int CountOccurrences(const string& pattern,
+ const string& bwt,
+ const map<char, int>& starts,
+ const map<char, vector<int> >& occ_count_before)
+{
+ for (int top = 0, bottom = bwt.size() - 1, i = pattern.size() - 1; top <= bottom; ) {
+ if (i >= 0) {
+ char c = pattern[i];
+ i--;
+ int s = starts.at(c);
+ const vector<int> &v = occ_count_before.at(c);
+ top = s + v[top];
+ bottom = s + v[bottom + 1] - 1;
+ } else
+ return bottom - top + 1;
+ }
+
+ return 0;
}
-
+
int main() {
string bwt;