Jaro-Winkler distance
Reusing the jaro() function from the Jaro similarity task.
func jaro_winkler_distance(s,t) {
var jaro_similarity = jaro(s, t)
var prefix = 0
for i in (0 .. min(3, s.len, t.len)) {
s.char(i) == t.char(i) ? ++prefix : break
}
1 - (prefix * 0.1 * (1 - jaro_similarity) + jaro_similarity)
}
# usage:
# sidef script.sf < unixdict.txt
var words = ARGF.slurp.words
%w(accomodate definately goverment occured publically recieve seperate untill wich).each {|word|
var result = Hash(words.map { (_, jaro_winkler_distance(word, _)) }...)
say "\nClosest 5 dictionary words with a Jaro-Winkler distance < .15 from #{word}:"
result.grep {|_,v| v < .15 }.sort_by{|_,v| v }.head(5).each_2d {|k,v|
printf("%15s : %0.4f\n", k, v)
}
}Output:
Last updated
Was this helpful?