Boyer-Moore string search

# 20220818 Raku programming solution

sub suffixes (@pat,\m) {
   loop (my ($i,$f,$g,@suff)=m-2, 0, m-1, |flat 0 xx m-1,m; $i >= 0; --$i) {
      if $i > $g and @suff[$i + m - 1 - $f] < $i - $g {
         @suff[$i] = @suff[$i + m - 1 - $f]
      } else {
	 $g = $i if $i < $g;
         $f = $i;
	 while $g >= 0 and @pat[$g] eq @pat[$g + m - 1 - $f] { $g-- }
         @suff[$i] = $f - $g
      }
   }
   return @suff;
}

sub preBmGs (@pat,\m) {
   my (@suff, @bmGs) := suffixes(@pat,m), [m xx m]; 

   for m-1 ... 0 -> \k {
      if @suff[k] == k + 1 {
         loop (my $j=0; $j < m-1-k; $j++) { @bmGs[k]=m-1-k if @bmGs[$j] == m }
      }
   }
   for ^(m-1) { @bmGs[m - 1 - @suff[$_]] = m - 1 - $_ }
   return @bmGs;
}

sub BM (@txt,@pat) {
   my (\m, \n, $j)    = +@pat, +@txt, 0;
   my (@bmGs, %bmBc) := preBmGs(@pat,m), hash @pat Z=> ( m-1 ... 1 );

   return gather while $j <= n - m {
      loop (my $i = m - 1; $i >= 0 and @pat[$i] eq @txt[$i + $j]; ) { $i-- }
      if $i < 0 {
	 take $j;
         $j += @bmGs[0]
      } else {
         $j += max @bmGs[$i], (%bmBc{@txt[$i + $j]} // m)-m+$i
      }
   }
}

my @texts = [
   "GCTAGCTCTACGAGTCTA",
   "GGCTATAATGCGTA",
   "there would have been a time for such a word",
   "needle need noodle needle",
   "BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
   "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented",
   "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.",
];

my @pats = [ "TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"];

say "text$_ = @texts[$_]" for @texts.keys ;
say();

for @pats.keys {
   my \j = $_ < 6 ?? $_ !! $_-1 ; # for searching text5 twice
   say "Found '@pats[$_]' in 'text{j}' at indices ", BM @texts[j].comb, @pats[$_].comb
}

Output is the same as the Knuth-Morris-Pratt_string_search#Raku entry.

Last updated