diff options
Diffstat (limited to '04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching')
-rw-r--r-- | 04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp | 108 |
1 files changed, 95 insertions, 13 deletions
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp index 9353389..10551df 100644 --- a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp @@ -12,31 +12,113 @@ using std::map; using std::string; using std::vector; +static bool debug = false; + // Preprocess the Burrows-Wheeler Transform bwt of some text // and compute as a result: -// * starts - for each character C in bwt, starts[C] is the first position -// of this character in the sorted array of +// * starts - for each character C in bwt, starts[C] is the first position +// of this character in the sorted array of // all characters of the text. // * occ_count_before - for each character C in bwt and each position P in bwt, // occ_count_before[C][P] is the number of occurrences of character C in bwt // from position 0 to position P inclusive. -void PreprocessBWT(const string& bwt, - map<char, int>& starts, - map<char, vector<int> >& occ_count_before) { - // Implement this function yourself +void PreprocessBWT(const string& bwt, + map<char, int>& starts, + map<char, vector<int> >& occ_count_before) +{ + starts.insert(std::pair<char,int>('A', -1)); + starts.insert(std::pair<char,int>('C', -1)); + starts.insert(std::pair<char,int>('G', -1)); + starts.insert(std::pair<char,int>('T', -1)); + occ_count_before.insert(std::pair<char,vector<int>>('A', vector<int>(bwt.size() + 1))); + occ_count_before.insert(std::pair<char,vector<int>>('C', vector<int>(bwt.size() + 1))); + occ_count_before.insert(std::pair<char,vector<int>>('G', vector<int>(bwt.size() + 1))); + occ_count_before.insert(std::pair<char,vector<int>>('T', vector<int>(bwt.size() + 1))); + + int count[] = {0, 0, 0, 0, 0}; + for (int i = 0; i < bwt.size(); i ++) { + occ_count_before['A'][i] = count[0]; + occ_count_before['C'][i] = count[1]; + occ_count_before['G'][i] = count[2]; + occ_count_before['T'][i] = count[3]; + switch(bwt[i]) { + case 'A': + count[0] += 1; + break; + case 'C': + count[1] += 1; + break; + case 'G': + count[2] += 1; + break; + case 'T': + count[3] += 1; + break; + default: + break; + } + } + occ_count_before['A'][bwt.size()] = count[0]; + occ_count_before['C'][bwt.size()] = count[1]; + occ_count_before['G'][bwt.size()] = count[2]; + occ_count_before['T'][bwt.size()] = count[3]; + + int x = 1; + if (count[0] != 0) { + starts['A'] = x; + x += count[0]; + } + if (count[1] != 0) { + starts['C'] = x; + x += count[1]; + } + if (count[2] != 0) { + starts['G'] = x; + x += count[2]; + } + if (count[3] != 0) { + starts['T'] = x; + x += count[3]; + } + + if (debug) { + std::cout << bwt << std::endl; + std::cout << "starts : " << std::endl; + for (std::map<char,int>::iterator it = starts.begin(); it != starts.end(); it++) + std::cout << it->first << " => " << it->second << std::endl; + for (std::map<char,vector<int>>::iterator it = occ_count_before.begin(); it != occ_count_before.end(); it++) { + std::cout << it->first << " => "; + for (int i = 0; i <= bwt.size(); i++) + std::cout << it->second[i] << " "; + std::cout << std::endl; + } + std::cout << std::endl; + } } // Compute the number of occurrences of string pattern in the text // given only Burrows-Wheeler Transform bwt of the text and additional // information we get from the preprocessing stage - starts and occ_counts_before. -int CountOccurrences(const string& pattern, - const string& bwt, - const map<char, int>& starts, - const map<char, vector<int> >& occ_count_before) { - // Implement this function yourself - return 0; +int CountOccurrences(const string& pattern, + const string& bwt, + const map<char, int>& starts, + const map<char, vector<int> >& occ_count_before) +{ + for (int top = 0, bottom = bwt.size() - 1, i = pattern.size() - 1; top <= bottom; ) { + if (i >= 0) { + char c = pattern[i]; + i--; + int s = starts.at(c); + const vector<int> &v = occ_count_before.at(c); + top = s + v[top]; + bottom = s + v[bottom + 1] - 1; + } else + return bottom - top + 1; + } + + return 0; } - + int main() { string bwt; |