class LogParser

Constants

LOG_FORMATS
STAT_ENV_MAP

Add any values that you may return here.

Attributes

constraint[R]
known_formats[R]
log_format[R]

Public Class Methods

new(format = nil, constraint = nil) click to toggle source
# File lib/log2counter/vendor/log_parser.rb, line 115
def initialize(format = nil, constraint = nil)
  @format     = format
  @constraint = constraint

  initialize_known_formats

  @log_format = known_formats[@format] if @format
end

Public Instance Methods

check_format(line) click to toggle source

Checks which standard the log file (well one line) is. Automatically checks for most complex (longest) regex first.

# File lib/log2counter/vendor/log_parser.rb, line 136
def check_format(line)
  @known_formats.sort_by { |key, log_format|
    log_format.format_regex.source.size
  }.reverse.each { |key, log_format|
    return key if line.match(log_format.format_regex)
  }

  return :unknown
end
generate_stats(parsed_data) click to toggle source

Populate a stats hash one line at a time. Add extra fields into the STAT_ENV_MAP hash at the top of this file.

# File lib/log2counter/vendor/log_parser.rb, line 175
def generate_stats(parsed_data)
  stats = { 'PATH_INFO' => get_page(parsed_data[:request]) }

  STAT_ENV_MAP.each { |stat_name, env_name|
    stats[env_name] = parsed_data[stat_name] if parsed_data.has_key?(stat_name)
  }

  stats
end
get_page(request) click to toggle source
# File lib/log2counter/vendor/log_parser.rb, line 185
def get_page(request)
  (request[/\/.*?(?:\s|\z)/] || request).strip
end
initialize_known_formats() click to toggle source

Processes the format string into symbols and test regex and saves using LogFormat class.

# File lib/log2counter/vendor/log_parser.rb, line 126
def initialize_known_formats
  @known_formats = {}

  LOG_FORMATS.each { |name, format|
    @known_formats[name] = LogFormat.new(name, format)
  }
end
parse_io_stream(stream) { |generate_stats(parsed_data)| ... } click to toggle source

This is where the magic happens. This is the end-to-end business logic of the class.

Call with a block that will be called with each line, as a hash.

# File lib/log2counter/vendor/log_parser.rb, line 150
def parse_io_stream(stream)
  stats = []
  lines_parsed = 0

  stream.each { |line|
    line.chomp!
    lines_parsed += 1
    warn "##{lines_parsed}" if (lines_parsed % 10000).zero?

    next if constraint && line !~ constraint

    begin
      parsed_data = parse_line(line)
      yield generate_stats(parsed_data)
    rescue FormatError
      warn "Corrupt line [#{lines_parsed}]: #{line.inspect}"
    rescue => err
      raise err.class, "#{err.class} [#{lines_parsed}]: #{line.inspect}\n\n" <<
                       "#{parsed_data.inspect}\n\n#{err}"
    end
  }
end
parse_line(line) click to toggle source
# File lib/log2counter/vendor/log_parser.rb, line 189
def parse_line(line)
  unless @format && @log_format
    @format     = check_format(line)
    @log_format = known_formats[@format]

    unless log_format && line =~ log_format.format_regex
      raise FormatError, line
    end
  end

  data = line.scan(log_format.format_regex).flatten

  parsed_data = {}
  log_format.format_symbols.each_with_index { |format_symbol, index|
    parsed_data[format_symbol] = data[index]
  }

  # Remove [] from time.
  parsed_data[:datetime] &&= parsed_data[:datetime][1...-1]

  # Add IP as domain if we don't have a domain (virtual host).
  # Assumes we always have an IP.
  parsed_data[:domain] ||= parsed_data[:ip]

  parsed_data[:format] = @format

  parsed_data
end