1 分析日志的Python框架awk.py
# # Custom awk.py module # class controller: def __init__(self, f): self.m_file = f self.m_handlers = [] def subscribe(self, o): self.m_handlers.append(o) def run(self): for o in self.m_handlers: o.begin() s = self.m_file.readline() while s != "": for o in self.m_handlers: o.process_line(s) s = self.m_file.readline() for o in self.m_handlers: o.end() def print_results(self): print print "Results:" print for o in self.m_handlers: print "------------------------------------------------------" print o.description() print "------------------------------------------------------" print o.result()
统计日志的点击量count_line.py# Standard sys module import sys # Custom awk.py module import awk class count_lines: def begin(self): self.m_count = 0 def process_line(self, s): self.m_count += 1 def end(self): pass def description(self): return "# of lines in the file" def result(self): return self.m_count # # Step 1: Create the Awk controller # ac = awk.controller(sys.stdin) # # Step 2: Subscribe the handler # ac.subscribe(count_lines()) # # Step 3: Run # ac.run() # # Step 4: Print the results # ac.print_results()
使用方法是shell中执行
# cat apachelog.log|python count_lines.py
统计浏览次数超过n次的访问者 visitors.py
How many people have returned to the site more than N times?