diff options
Diffstat (limited to '04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching')
7 files changed, 77 insertions, 0 deletions
diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp new file mode 100644 index 0000000..9353389 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/bwmatching.cpp @@ -0,0 +1,65 @@ +#include <algorithm> +#include <cstdio> +#include <iostream> +#include <map> +#include <sstream> +#include <string> +#include <vector> + +using std::cin; +using std::istringstream; +using std::map; +using std::string; +using std::vector; + +// Preprocess the Burrows-Wheeler Transform bwt of some text +// and compute as a result: +//   * starts - for each character C in bwt, starts[C] is the first position  +//       of this character in the sorted array of  +//       all characters of the text. +//   * occ_count_before - for each character C in bwt and each position P in bwt, +//       occ_count_before[C][P] is the number of occurrences of character C in bwt +//       from position 0 to position P inclusive. +void PreprocessBWT(const string& bwt,  +                   map<char, int>& starts,  +                   map<char, vector<int> >& occ_count_before) { +  // Implement this function yourself +} + +// Compute the number of occurrences of string pattern in the text +// given only Burrows-Wheeler Transform bwt of the text and additional +// information we get from the preprocessing stage - starts and occ_counts_before. +int CountOccurrences(const string& pattern,  +                     const string& bwt,  +                     const map<char, int>& starts,  +                     const map<char, vector<int> >& occ_count_before) { +  // Implement this function yourself +  return 0; +} +      + +int main() { +  string bwt; +  cin >> bwt; +  int pattern_count; +  cin >> pattern_count; +  // Start of each character in the sorted list of characters of bwt, +  // see the description in the comment about function PreprocessBWT +  map<char, int> starts; +  // Occurrence counts for each character and each position in bwt, +  // see the description in the comment about function PreprocessBWT +  map<char, vector<int> > occ_count_before; +  // Preprocess the BWT once to get starts and occ_count_before. +  // For each pattern, we will then use these precomputed values and +  // spend only O(|pattern|) to find all occurrences of the pattern +  // in the text instead of O(|pattern| + |text|). +  PreprocessBWT(bwt, starts, occ_count_before); +  for (int pi = 0; pi < pattern_count; ++pi) { +    string pattern; +    cin >> pattern; +    int occ_count = CountOccurrences(pattern, bwt, starts, occ_count_before); +    printf("%d ", occ_count); +  } +  printf("\n"); +  return 0; +} diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 new file mode 100644 index 0000000..4a99802 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1 @@ -0,0 +1,3 @@ +AGGGAA$ +1 +GA diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a new file mode 100644 index 0000000..d855592 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample1.a @@ -0,0 +1 @@ +3  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 new file mode 100644 index 0000000..82a1e71 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2 @@ -0,0 +1,3 @@ +ATT$AA +2 +ATA A diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a new file mode 100644 index 0000000..dcbc365 --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample2.a @@ -0,0 +1 @@ +2 3  diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 new file mode 100644 index 0000000..884fa3c --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3 @@ -0,0 +1,3 @@ +AT$TCTATG +2 +TCT TATG diff --git a/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a new file mode 100644 index 0000000..5d5d69c --- /dev/null +++ b/04-algorithms_on_strings/02-burrows_wheeler/03-bwmatching/tests/sample3.a @@ -0,0 +1 @@ +0 0   | 
