class PerseusMatch

Constants

DEFAULT_COEFF
DISTANCE_SPEC
Infinity
VERSION

Attributes

default_coeff[R]
distance_spec[R]
phrase[R]
target[R]
verbose[R]

Public Class Methods

check(*args) click to toggle source
# File lib/perseus_match.rb, line 69
def check(*args)
  check!(*args)
rescue CheckFailedError
  false
end
check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity) click to toggle source
# File lib/perseus_match.rb, line 75
def check!(phrase, target, threshold = 0, operator = :>, pm_options = {}, attribute = :similarity)
  pm = new(phrase, target, pm_options)
  value = pm.send(attribute)

  if value.send(operator, threshold)
    Struct.new(:pm, :value, :threshold, :operator).new(pm, value, threshold, operator)
  else
    raise CheckFailedError.new(pm, value, threshold, operator)
  end
end
cluster(phrases, options = {}, pm_options = {}) click to toggle source
# File lib/perseus_match.rb, line 65
def cluster(phrases, options = {}, pm_options = {})
  Cluster.new(phrases, pm_options).rank(options)
end
distance(*args) click to toggle source
# File lib/perseus_match.rb, line 57
def distance(*args)
  new(*args).distance
end
match(phrases, pm_options = {}) click to toggle source
# File lib/perseus_match.rb, line 61
def match(phrases, pm_options = {})
  List.new(phrases, pm_options)
end
new(phrase, target, options = {}) click to toggle source
# File lib/perseus_match.rb, line 98
def initialize(phrase, target, options = {})
  @phrase = sanitize(phrase.to_s)
  @target = sanitize(target.to_s)

  @default_coeff = options[:default_coeff] || DEFAULT_COEFF
  @distance_spec = options[:distance_spec] || DISTANCE_SPEC

  @verbose = options[:verbose]

  @similarity = {}
end
tokenize(form, unknowns = false) click to toggle source
# File lib/perseus_match.rb, line 86
def tokenize(form, unknowns = false)
  if file = TokenSet.file?(form)
    TokenSet.tokenize(file, unknowns)
  else
    PhraseTokenSet.tokenize(form, unknowns)
  end
end

Public Instance Methods

distance() click to toggle source

0 <= distance <= Infinity

# File lib/perseus_match.rb, line 119
def distance
  @distance ||= calculate_distance
end
phrase_tokens() click to toggle source
# File lib/perseus_match.rb, line 110
def phrase_tokens
  @phrase_tokens ||= self.class.tokenize(phrase)
end
similarity(coeff = nil) click to toggle source

1 >= similarity >= 0

# File lib/perseus_match.rb, line 124
def similarity(coeff = nil)
  coeff ||= default_coeff  # passed arg may be nil
  @similarity[coeff] ||= normalize_distance(coeff)
end
target_tokens() click to toggle source
# File lib/perseus_match.rb, line 114
def target_tokens
  @target_tokens ||= self.class.tokenize(target)
end

Private Instance Methods

calculate_distance() click to toggle source
# File lib/perseus_match.rb, line 135
def calculate_distance
  return Infinity if phrase_tokens.disjoint?(target_tokens)
  return 0        if phrase_tokens.eql?(target_tokens)

  distance_spec.inject(0) { |distance, (options, weight)|
    distance + token_distance(options) * weight
  }
end
normalize_distance(coeff) click to toggle source
# File lib/perseus_match.rb, line 170
def normalize_distance(coeff)
  length = phrase_tokens.size + target_tokens.size
  return 0 if length == 0

  norm = Math.log(length ** Math.sqrt(2)) * coeff * total_weight * Math::E

  1 / Math.exp(distance / norm)
end
sanitize(str) click to toggle source
# File lib/perseus_match.rb, line 131
def sanitize(str)
  str.gsub(/\s*\(.*?\)|\s*\[.*?\]/, '').sub(/\s*[\/:].*/, '')
end
token_distance(options = {}) click to toggle source
# File lib/perseus_match.rb, line 144
  def token_distance(options = {})
    tokens1 = phrase_tokens.inclexcl(options)
    tokens2 = target_tokens.inclexcl(options)

    if options[:sort]
      tokens1 = tokens1.sort
      tokens2 = tokens2.sort
    end

    if options[:soundex]
      tokens1 = tokens1.soundex
      tokens2 = tokens2.soundex
    end

    distance = tokens1.distance(tokens2)

    warn "#{options.inspect}:
  #{tokens1.inspect}
  #{tokens2.inspect}
=> #{distance}
" if verbose

    distance
  end
total_weight() click to toggle source
# File lib/perseus_match.rb, line 179
def total_weight
  @total_weight ||= distance_spec.inject(0.0) { |total, (_, weight)| total + weight }
end