Jaro-Winkler distance

Reusing the jaro() function from the Jaro similarity task.

func jaro_winkler_distance(s,t) {

    var jaro_similarity = jaro(s, t)

    var prefix = 0
    for i in (0 .. min(3, s.len, t.len)) {
        s.char(i) == t.char(i) ? ++prefix : break
    }

    1 - (prefix * 0.1 * (1 - jaro_similarity) + jaro_similarity)
}

# usage: 
#    sidef script.sf < unixdict.txt

var words = ARGF.slurp.words

%w(accomodate definately goverment occured publically recieve seperate untill wich).each {|word|

   var result = Hash(words.map { (_, jaro_winkler_distance(word, _)) }...)

   say "\nClosest 5 dictionary words with a Jaro-Winkler distance < .15 from #{word}:"
   result.grep {|_,v| v < .15 }.sort_by{|_,v| v }.head(5).each_2d {|k,v|
        printf("%15s : %0.4f\n", k, v)
    }
}

Output:

Last updated

Was this helpful?