class PerseusMatch::PhraseTokenSet

Public Class Methods

tokenize(form, unknowns = false) click to toggle source
# File lib/perseus_match/token_set.rb, line 259
def tokenize(form, unknowns = false)
  (@tokens ||= {})[form] ||= new(form, form.scan(PRINTABLE_CHAR_RE).map { |i|
    TokenSet.tokenize(i, unknowns)
  })
end

Public Instance Methods

distance(other) click to toggle source

(size1 - size2).abs <= distance <= [size1, size2].max

# File lib/perseus_match/token_set.rb, line 271
def distance(other)
  token_sets1, token_sets2 = token_sets, other.token_sets
  size1, size2 = token_sets1.size, token_sets2.size

  return size2 if size1 == 0
  return size1 if size2 == 0

  distance, costs = nil, (0..size2).to_a

  0.upto(size1 - 1) { |index1|
    token_set1, cost = token_sets1[index1], index1 + 1

    0.upto(size2 - 1) { |index2|
      penalty = token_set1.distance(token_sets2[index2])

      # rcov hack :-(
      _ = [
        costs[index2 + 1] + 1,   # insertion
        cost + 1,                # deletion
        costs[index2] + penalty  # substitution
      ]
      distance = _.min

      costs[index2], cost = cost, distance
    }

    costs[size2] = distance
  }

  distance
end
excl(wcs) click to toggle source
# File lib/perseus_match/token_set.rb, line 311
def excl(wcs)
  self.class.new(form, map { |token_set| token_set.excl(wcs) })
end
forms() click to toggle source
# File lib/perseus_match/token_set.rb, line 303
def forms
  @forms ||= map { |token_set| token_set.forms }
end
incl(wcs) click to toggle source
# File lib/perseus_match/token_set.rb, line 307
def incl(wcs)
  self.class.new(form, map { |token_set| token_set.incl(wcs) })
end
soundex() click to toggle source
# File lib/perseus_match/token_set.rb, line 315
def soundex
  ensure_soundex!
  @soundex ||= self.class.new(form, map { |token_set| token_set.soundex })
end