Most frequent k chars distance

func _MostFreqKHashing(string, k) {

    var seen = Hash()
    var chars = string.chars
    var freq = chars.freq
    var schars = freq.keys.sort_by {|c| -freq{c} }

    var mfkh = []
    for i in ^k {
        chars.each { |c|
            seen{c} && next
            if (freq{c} == freq{schars[i]}) {
                seen{c} = true
                mfkh << Hash(c => c, f => freq{c})
                break
            }
        }
    }

    mfkh << (k-seen.len -> of { Hash(c => :NULL, f => 0) }...)
    mfkh
}

func MostFreqKSDF(a, b, k, d) {

    var mfkh_a = _MostFreqKHashing(a, k);
    var mfkh_b = _MostFreqKHashing(b, k);

    d - gather {
        mfkh_a.each { |s|
            s{:c} == :NULL && next
            mfkh_b.each { |t|
                s{:c} == t{:c} &&
                    take(s{:f} + (s{:f} == t{:f} ? 0 : t{:f}))
            }
        }
    }.sum
}

func MostFreqKHashing(string, k) {
    gather {
        _MostFreqKHashing(string, k).each { |h|
            take("%s%d" % (h{:c}, h{:f}))
        }
    }.join
}


var str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
var str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"

say "str1 = #{str1.dump}"
say "str2 = #{str2.dump}"

say ''

say("MostFreqKHashing(str1, 2) = ", MostFreqKHashing(str1, 2))
say("MostFreqKHashing(str2, 2) = ", MostFreqKHashing(str2, 2))
say("MostFreqKSDF(str1, str2, 2, 100) = ", MostFreqKSDF(str1, str2, 2, 100))

say ''

var arr = [
    %w(night nacht),
    %w(my a),
    %w(research research),
    %w(aaaaabbbb ababababa),
    %w(significant capabilities),
]

var k = 2
var limit = 10

for s,t in arr {
    "mfkh(%s, %s, #{k}) = [%s, %s] (SDF: %d)\n".printf(
        s.dump, t.dump,
        MostFreqKHashing(s, k).dump,
        MostFreqKHashing(t, k).dump,
        MostFreqKSDF(s, t, k, limit),
    )
}

Output:

str1 = "LCLYTHIGRNIYYGSYLYSETWNTGIMLLLITMATAFMGYVLPWGQMSFWGATVITNLFSAIPYIGTNLV"
str2 = "EWIWGGFSVDKATLNRFFAFHFILPFTMVALAGVHLTFLHETGSNNPLGLTSDSDKIPFHPYYTIKDFLG"

MostFreqKHashing(str1, 2) = L9T8
MostFreqKHashing(str2, 2) = F9L8
MostFreqKSDF(str1, str2, 2, 100) = 83

mfkh("night", "nacht", 2) = ["n1i1", "n1a1"] (SDF: 9)
mfkh("my", "a", 2) = ["m1y1", "a1NULL0"] (SDF: 10)
mfkh("research", "research", 2) = ["r2e2", "r2e2"] (SDF: 6)
mfkh("aaaaabbbb", "ababababa", 2) = ["a5b4", "a5b4"] (SDF: 1)
mfkh("significant", "capabilities", 2) = ["i3n2", "i3a2"] (SDF: 7)

Last updated