Knuth-Morris-Pratt string search
func kmp_search (String S, String W) {
S.graphemes!
W.graphemes!
func kmp_table {
var (pos, cnd, *T) = (1, 0, -1)
for (nil; pos < W.len; (++pos; ++cnd)) {
if (W[pos] == W[cnd]) {
T[pos] = T[cnd]
}
else {
T[pos] = cnd
while ((cnd >= 0) && (W[pos] != W[cnd])) {
cnd = T[cnd]
}
}
}
T[pos] = cnd
return T
}
var (k, T, *I) = (0, kmp_table())
for (var j = 0; j < S.len; nil) {
if (W[k] == S[j]) {
++j
++k
if (k == W.len) {
I << (j - k)
k = T[k]
}
}
elsif ((k = T[k]) < 0) {
++j
++k
}
}
return I
}
var texts = [
"GCTAGCTCTACGAGTCTA",
"GGCTATAATGCGTA",
"there would have been a time for such a word",
"needle need noodle needle",
"BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhārat"+
"amഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரத"+
"ம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnu"+
"thusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblyl"+
"anguagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with ba"+
"les of all that alfalfa exchanged for milk.",
]
var pats = ["TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"]
texts.each_kv {|k,v| say "text#{k} = #{v}" }; say ''
pats.each_kv {|k,v|
var j = (k < 6 ? k : k-1)
say ("Found '#{v}' in 'text#{j}' at indices ", kmp_search(texts[j], v))
}
Output:
text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text3 = needle need noodle needle
text4 = BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
Found 'TCTA' in 'text0' at indices [6, 14]
Found 'TAATAAA' in 'text1' at indices []
Found 'word' in 'text2' at indices [40]
Found 'needle' in 'text3' at indices [0, 19]
Found 'ഭാരതം' in 'text4' at indices [68, 138]
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]
Last updated