Knuth-Morris-Pratt string search

Based on pseudocode found in WP (kmp_search & kmp_table). Data and output format mostly follows the Wren entry.

# 20220810 Raku programming solution

sub kmp_search (@S where *.Bool, @W where *.Bool) {

   sub kmp_table (@W where *.Bool) {
      loop (my ($pos,$cnd,@T) = 1,0,-1 ; $pos < @W.elems ; ($pos, $cnd)>>++) {
         if @W[$pos] eq @W[$cnd] {
            @T[$pos]  = @T[$cnd]
         } else {
            @T[$pos]  = $cnd;
            while $cnd ≥ 0 and @W[$pos] ne @W[$cnd] { $cnd = @T[$cnd] }
         }
      }
      @T[$pos] = $cnd;
      return @T
   }

   return gather loop (my ($j,$k,@T) = 0,0, |kmp_table @W; $j < @S.elems; ) { 
      if @W[$k] eq @S[$j] { 
         ($j, $k)>>++;   
         if $k == @W.elems { 
	    take $j - $k;
            $k = @T[$k]  
         } 
      } else {
         ($j, $k)>>++ if ($k = @T[$k]) < 0    
      }
   }
}

my @texts = [
   "GCTAGCTCTACGAGTCTA",
   "GGCTATAATGCGTA",
   "there would have been a time for such a word",
   "needle need noodle needle",
   "BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
   "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented",
   "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.",
];

my @pats = ["TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"];

say "text$_ = @texts[$_]" for @texts.keys ; 
say();

for @pats.keys {
   my \j = $_ < 6 ?? $_ !! $_-1 ; # for searching text5 twice
   say "Found '@pats[$_]' in 'text{j}' at indices ", kmp_search @texts[j].comb, @pats[$_].comb
}

Output:

Last updated

Was this helpful?