python 统计nginx日志的访问ip和流量

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

#!/bin/env python
#coding:utf-8
#统计nginx的访问ip和流量
#具有时间段分析功能
import sys
import ip_location
import time
import re
reload(sys)
sys.setdefaultencoding('utf-8')
#time_start=sys.argv[1]
#time_start=sys.argv[2]
ipflow={}
ipnum={}
#nginx日志
log_file="/data/logs/lolo.log"
#时间的正则和格式
re_time='\d{2}\/\w{3}\/\d{4}:\d{2}:\d{2}:\d{2}'
str_time='%d/%b/%Y:%H:%M:%S'

#"时间段"
class TimeParser(object):
    def __init__(self,re_time,str_time):
        self.re_time=re.compile(re_time)
        self.str_time=str_time
    def get(self,line):
        t=re.search(self.re_time,line).group(0)
        return time.mktime(time.strptime(t,self.str_time))
    def inPeriod(self,line):
        t=self.get(line)
        return (t>time.mktime(time.strptime(start_time,self.str_time)) and t<time.mktime(time.strptime(end_time,self.str_time)))
#处理函数
class ParseLog(object):
    def __init__(self,file_name):
        self.file_name=file_name
        self.re_time=re.compile(re_time)
        self.srt_time=str_time
    def show(self):
        fd=open(self.file_name,"r")
        contens=fd.readlines()
        fd.close()
        Time=TimeParser(self.re_time,self.srt_time)
        for line in contens:
            if Time.inPeriod(line):
                ip=line.split()[1]
                flow=line.split()[10]
                #采用集合
                if ip in set(k.lower() for k in ipflow):
                    ipnum[ip]+=1
                    ipflow[ip]=int(ipflow[ip])+int(flow)
                else:
                    ipnum[ip]=1
                    ipflow[ip]=int(flow)
        for k in ipnum:
            name=ip_location.ip_location(k)
            print "访问IP:%s 访问次数:%d 访问流量%.3fK 归属地:%s" %(k,int(ipnum[k]),ipflow[k],name)
if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "输入的参数错误"
        sys.exit()
    start_time=sys.argv[1]
    end_time=sys.argv[2]
    p=ParseLog(log_file)
    p.show()