手机版
你好,游客 登录 注册
背景:
阅读新闻

Apache/Nginx 访问日志分析脚本

[日期:2013-05-25] 来源:Linux社区  作者:linux5588 [字体: ]

Apache访问日志分析脚本【第二版】

  1. #!/usr/bin/env Python
  2. # coding=utf-8
  3. #------------------------------------------------------
  4. # Name: Apache 日志分析脚本
  5. # Purpose: 此脚本只用来分析Apache的访问日志
  6. # Version: 2.0
  7. # Author: LEO
  8. # BLOG: http://www.linuxidc.com
  9. # EMAIL: chanyipiaomiao@163.com
  10. # Created: 2013-4-26
  11. # Modified: 2013-5-4
  12. # Copyright: (c) LEO 2013
  13. #------------------------------------------------------
  14. import sys
  15. import time
  16. #该类是用来打印格式
  17. class displayFormat(object):
  18. def format_size(self,size):
  19. '''''格式化流量单位'''
  20. KB = 1024
  21. MB = 1048576
  22. GB = 1073741824
  23. TB = 1099511627776
  24. if size >= TB :
  25. size = str(size / TB) + 'T'
  26. elif size < KB :
  27. size = str(size) + 'B'
  28. elif size >= GB and size < TB:
  29. size = str(size / GB) + 'G'
  30. elif size >= MB and size < GB :
  31. size = str(size / MB) + 'M'
  32. else :
  33. size = str(size / KB) + 'K'
  34. return size
  35. formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
  36. def transverse_line(self) :
  37. '''''输出横线'''
  38. printself.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
  39. def head(self):
  40. '''''输出头部信息'''
  41. printself.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
  42. def error_print(self) :
  43. '''''输出错误信息'''
  44. print
  45. print'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
  46. print
  47. sys.exit(1)
  48. def execut_time(self):
  49. '''''输出脚本执行的时间'''
  50. print
  51. print"Script Execution Time: %.3f second" % time.clock()
  52. print
  53. #该类是用来生成主机信息的字典
  54. class hostInfo(object):
  55. host_info = ['200','404','500','302','304','503','403','times','size']
  56. def __init__(self,host):
  57. self.host = host = {}.fromkeys(self.host_info,0)
  58. def increment(self,status_times_size,is_size):
  59. '''''该方法是用来给host_info中的各个值加1'''
  60. if status_times_size == 'times':
  61. self.host['times'] += 1
  62. elif is_size:
  63. self.host['size'] = self.host['size'] + status_times_size
  64. else:
  65. self.host[status_times_size] += 1
  66. def get_value(self,value):
  67. '''''该方法是取到各个主机信息中对应的值'''
  68. returnself.host[value]
  69. #该类是用来分析文件
  70. class fileAnalysis(object):
  71. def __init__(self):
  72. '''''初始化一个空字典'''
  73. self.report_dict = {}
  74. self.total_request_times,self.total_traffic,self.total_200, \
  75. self.total_404,self.total_500,self.total_403,self.total_302, \
  76. self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
  77. def split_eachline_todict(self,line):
  78. '''''分割文件中的每一行,并返回一个字典'''
  79. split_line = line.split()
  80. split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
  81. return split_dict
  82. def generate_log_report(self,logfile):
  83. '''''读取文件,分析split_eachline_todict方法生成的字典'''
  84. for line in logfile:
  85. try:
  86. line_dict = self.split_eachline_todict(line)
  87. host = line_dict['remote_host']
  88. status = line_dict['status']
  89. except ValueError :
  90. continue
  91. except IndexError :
  92. continue
  93. if host notinself.report_dict :
  94. host_info_obj = hostInfo(host)
  95. self.report_dict[host] = host_info_obj
  96. else :
  97. host_info_obj = self.report_dict[host]
  98. host_info_obj.increment('times',False)
  99. if status in host_info_obj.host_info :
  100. host_info_obj.increment(status,False)
  101. try:
  102. bytes_sent = int(line_dict['bytes_sent'])
  103. except ValueError:
  104. bytes_sent = 0
  105. host_info_obj.increment(bytes_sent,True)
  106. returnself.report_dict
  107. def return_sorted_list(self,true_dict):
  108. '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
  109. for host_key in true_dict :
  110. host_value = true_dict[host_key]
  111. times = host_value.get_value('times')
  112. self.total_request_times = self.total_request_times + times
  113. size = host_value.get_value('size')
  114. self.total_traffic = self.total_traffic + size
  115. o200 = host_value.get_value('200')
  116. o404 = host_value.get_value('404')
  117. o500 = host_value.get_value('500')
  118. o403 = host_value.get_value('403')
  119. o302 = host_value.get_value('302')
  120. o304 = host_value.get_value('304')
  121. o503 = host_value.get_value('503')
  122. true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, \
  123. '503':o503,'times':times,'size':size}
  124. self.total_200 = self.total_200 + o200
  125. self.total_404 = self.total_404 + o404
  126. self.total_500 = self.total_500 + o500
  127. self.total_302 = self.total_302 + o302
  128. self.total_304 = self.total_304 + o304
  129. self.total_503 = self.total_503 + o503
  130. sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
  131. return sorted_list
  132. class Main(object):
  133. def main(self) :
  134. '''''主调函数'''
  135. display_format = displayFormat()
  136. arg_length = len(sys.argv)
  137. if arg_length == 1 :
  138. display_format.error_print()
  139. elif arg_length == 2or arg_length == 3:
  140. infile_name = sys.argv[1]
  141. try :
  142. infile = open(infile_name,'r')
  143. if arg_length == 3 :
  144. lines = int(sys.argv[2])
  145. else :
  146. lines = 0
  147. except IOError,e :
  148. print
  149. print e
  150. display_format.error_print()
  151. except ValueError :
  152. print
  153. print"Please Enter A Volid Number !!"
  154. display_format.error_print()
  155. else :
  156. display_format.error_print()
  157. fileAnalysis_obj = fileAnalysis()
  158. not_true_dict = fileAnalysis_obj.generate_log_report(infile)
  159. log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
  160. total_ip = len(log_report)
  161. if lines :
  162. log_report = log_report[0:lines]
  163. infile.close()
  164. print
  165. total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
  166. total_request_times = fileAnalysis_obj.total_request_times
  167. print'Total IP: %s Total Traffic: %s Total Request Times: %d' \
  168. % (total_ip,total_traffic,total_request_times)
  169. print
  170. display_format.head()
  171. display_format.transverse_line()
  172. for host in log_report :
  173. times = host[1]['times']
  174. times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
  175. print display_format.formatstring % (host[0],\
  176. display_format.format_size(host[1]['size']),\
  177. times,str(times_percent)[0:5],\
  178. host[1]['200'],host[1]['404'],\
  179. host[1]['500'],host[1]['403'],\
  180. host[1]['302'],host[1]['304'],host[1]['503'])
  181. if (not lines) or total_ip == lines :
  182. display_format.transverse_line()
  183. print display_format.formatstring % (total_ip,total_traffic, \
  184. total_request_times,'100%',\
  185. fileAnalysis_obj.total_200,\
  186. fileAnalysis_obj.total_404,\
  187. fileAnalysis_obj.total_500, \
  188. fileAnalysis_obj.total_403,\
  189. fileAnalysis_obj.total_302, \
  190. fileAnalysis_obj.total_304,\
  191. fileAnalysis_obj.total_503)
  192. display_format.execut_time()
  193. if __name__ == '__main__':
  194. main_obj = Main()
  195. main_obj.main()
linux
相关资讯       Apache日志分析  Nginx日志分析 
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数

       

评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款