Add any values that you may return here.
# File lib/log2counter/vendor/log_parser.rb, line 115 def initialize(format = nil, constraint = nil) @format = format @constraint = constraint initialize_known_formats @log_format = known_formats[@format] if @format end
Checks which standard the log file (well one line) is. Automatically checks for most complex (longest) regex first.
# File lib/log2counter/vendor/log_parser.rb, line 136 def check_format(line) @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format| return key if line.match(log_format.format_regex) } return :unknown end
Populate a stats hash one line at a time. Add extra fields into the STAT_ENV_MAP hash at the top of this file.
# File lib/log2counter/vendor/log_parser.rb, line 175 def generate_stats(parsed_data) stats = { 'PATH_INFO' => get_page(parsed_data[:request]) } STAT_ENV_MAP.each { |stat_name, env_name| stats[env_name] = parsed_data[stat_name] if parsed_data.has_key?(stat_name) } stats end
# File lib/log2counter/vendor/log_parser.rb, line 185 def get_page(request) (request[/\/.*?(?:\s|\z)/] || request).strip end
Processes the format string into symbols and test regex and saves using LogFormat class.
# File lib/log2counter/vendor/log_parser.rb, line 126 def initialize_known_formats @known_formats = {} LOG_FORMATS.each { |name, format| @known_formats[name] = LogFormat.new(name, format) } end
This is where the magic happens. This is the end-to-end business logic of the class.
Call with a block that will be called with each line, as a hash.
# File lib/log2counter/vendor/log_parser.rb, line 150 def parse_io_stream(stream) stats = [] lines_parsed = 0 stream.each { |line| line.chomp! lines_parsed += 1 warn "##{lines_parsed}" if (lines_parsed % 10000).zero? next if constraint && line !~ constraint begin parsed_data = parse_line(line) yield generate_stats(parsed_data) rescue FormatError warn "Corrupt line [#{lines_parsed}]: #{line.inspect}" rescue => err raise err.class, "#{err.class} [#{lines_parsed}]: #{line.inspect}\n\n" << "#{parsed_data.inspect}\n\n#{err}" end } end
# File lib/log2counter/vendor/log_parser.rb, line 189 def parse_line(line) unless @format && @log_format @format = check_format(line) @log_format = known_formats[@format] unless log_format && line =~ log_format.format_regex raise FormatError, line end end data = line.scan(log_format.format_regex).flatten parsed_data = {} log_format.format_symbols.each_with_index { |format_symbol, index| parsed_data[format_symbol] = data[index] } # Remove [] from time. parsed_data[:datetime] &&= parsed_data[:datetime][1...-1] # Add IP as domain if we don't have a domain (virtual host). # Assumes we always have an IP. parsed_data[:domain] ||= parsed_data[:ip] parsed_data[:format] = @format parsed_data end