# File lib/perseus_match/token_set.rb, line 259 def tokenize(form, unknowns = false) (@tokens ||= {})[form] ||= new(form, form.scan(PRINTABLE_CHAR_RE).map { |i| TokenSet.tokenize(i, unknowns) }) end
(size1 - size2).abs <= distance <= [size1, size2].max
# File lib/perseus_match/token_set.rb, line 271 def distance(other) token_sets1, token_sets2 = token_sets, other.token_sets size1, size2 = token_sets1.size, token_sets2.size return size2 if size1 == 0 return size1 if size2 == 0 distance, costs = nil, (0..size2).to_a 0.upto(size1 - 1) { |index1| token_set1, cost = token_sets1[index1], index1 + 1 0.upto(size2 - 1) { |index2| penalty = token_set1.distance(token_sets2[index2]) # rcov hack :-( _ = [ costs[index2 + 1] + 1, # insertion cost + 1, # deletion costs[index2] + penalty # substitution ] distance = _.min costs[index2], cost = cost, distance } costs[size2] = distance } distance end
# File lib/perseus_match/token_set.rb, line 311 def excl(wcs) self.class.new(form, map { |token_set| token_set.excl(wcs) }) end
# File lib/perseus_match/token_set.rb, line 303 def forms @forms ||= map { |token_set| token_set.forms } end
# File lib/perseus_match/token_set.rb, line 307 def incl(wcs) self.class.new(form, map { |token_set| token_set.incl(wcs) }) end
# File lib/perseus_match/token_set.rb, line 315 def soundex ensure_soundex! @soundex ||= self.class.new(form, map { |token_set| token_set.soundex }) end