# File lib/perseus_match/token_set.rb, line 146 def file?(form) file = Pathname.new(form).absolute? ? form : File.expand_path(form) file if File.file?(file) && File.readable?(file) end
# File lib/perseus_match/token_set.rb, line 191 def initialize(form, tokens = nil) super(tokens || self.class.tokenize(form)) @form = form @tokens = to_a end
# File lib/perseus_match/token_set.rb, line 98 def tokenize(form, unknowns = false) form.downcase! return @tokens[form] if @tokens ||= nil @_tokens = Hash.new @tokens = Hash.new { |h, k| h[k] = new(k, @_tokens[k] || []) } tokens_file = ENV['PM_TOKENS_FILE'] || 'perseus.tokens' if File.readable?(tokens_file) File.open(tokens_file) { |f| parse(f, unknowns, @_tokens) } @tokens[form] else raise "Lingo installation not found at #{LINGO_BASE}" unless LINGO_FOUND cfg = Tempfile.open(['perseus_match_lingo', '.cfg']) { |t| YAML.dump(LINGO_CONFIG, t) } file = file?(form) if keep = ENV['PM_KEEP_TOKENS'] keep = File.expand_path(keep =~ /\A(?:1|y(?:es)?|true)\z/ ? tokens_file : keep) end begin Dir.chdir(LINGO_BASE) { Process.ruby(*%W[lingo.rb -c #{cfg.path}]) { |_, i, o, _| file ? File.foreach(file) { |line| i.puts line } : i.puts(form) i.close_write tokens = o.read File.open(keep, 'w') { |f| f.puts tokens } if keep parse(tokens, unknowns, @_tokens) } } ensure cfg.unlink end unless file tokens, @tokens = @tokens[form], nil tokens end end end
# File lib/perseus_match/token_set.rb, line 153 def parse(output, unknowns = false, tokens = {}) sanitize = lambda { |a| a.sub!(Token::WC_RE, '') a.downcase! } output.each_line { |res| case res when /<(.*?)\s=\s\[(.*)\]>/ a, b = $1, $2 sanitize[a] tokens[a] ||= b.scan(/\((.*?)\+?\)/).flatten.map { |t| Token.new(t) } when /<(.*)>/, /:(.*):/ a, b = $1, Token.new($1.downcase) sanitize[a] if unknowns && b.unk? if unknowns.respond_to?(:<<) unknowns << a else warn "UNK: #{a} [#{res.strip}]" end end tokens[a] ||= [b] end } tokens end
# File lib/perseus_match/token_set.rb, line 231 def ==(other) tokens == other.tokens end
# File lib/perseus_match/token_set.rb, line 206 def disjoint?(other) (forms.flatten & other.forms.flatten).flatten.empty? end
# File lib/perseus_match/token_set.rb, line 198 def distance(other) (forms | other.forms).size - (forms & other.forms).size end
# File lib/perseus_match/token_set.rb, line 235 def eql?(other) self == other && form == other.form end
# File lib/perseus_match/token_set.rb, line 218 def excl(wcs) self.class.new(form, reject { |token| token.match?(wcs) }) end
# File lib/perseus_match/token_set.rb, line 202 def forms @forms ||= map { |token| token.form } end
# File lib/perseus_match/token_set.rb, line 214 def incl(wcs) self.class.new(form, select { |token| token.match?(wcs) }) end
# File lib/perseus_match/token_set.rb, line 210 def inclexcl(inclexcl = {}) incl(inclexcl[:incl] || Token::ANY_WC).excl(inclexcl[:excl]) end
# File lib/perseus_match/token_set.rb, line 239 def inspect "#{super}<#{form}>" end
# File lib/perseus_match/token_set.rb, line 222 def soundex ensure_soundex! @soundex ||= self.class.new(form, map { |token| form = token.form.replace_diacritics.sub(/\W+/, '') Token.new(Text::Soundex.soundex(form) || '', token.wc) }) end
# File lib/perseus_match/token_set.rb, line 247 def ensure_soundex! unless defined?(Text::Soundex) raise RuntimeError, "Soundex functionality not available", caller(1) end end