手机版
你好,游客 登录 注册
背景:
阅读新闻

Apache/Nginx 访问日志分析脚本

[日期:2013-05-25] 来源:Linux社区  作者:linux5588 [字体: ]

脚本功能:

脚本采用Python2.7编写,用来分析Apahce的访问日志 

脚本用法:

  1. 脚本名 Apache的访问日志 [想要显示的行数] 

更新: 

1.第二版:增加 显示指定的行数

2.增加Nginx日志分析脚本 

 

免费下载地址在 http://linux.linuxidc.com/

用户名与密码都是www.linuxidc.com

具体下载目录在 /2013年资料/5月/25日/Apache&Nginx 访问日志分析脚本

 

脚本执行效果如下:

 

脚本内容如下:

nginx访问日志分析脚本

  1. #!/usr/bin/env python
  2. # coding=utf-8
  3. #------------------------------------------------------
  4. # Name: nginx 日志分析脚本
  5. # Purpose: 此脚本只用来分析nginx的访问日志
  6. # Version: 1.0
  7. # Author: LEO
  8. # BLOG: http://www.linuxidc.com
  9. # EMAIL: chanyipiaomiao@163.com
  10. # Created: 2013-05-07
  11. # Modified: 2013-05-07
  12. # Copyright: (c) LEO 2013
  13. #------------------------------------------------------
  14. import sys
  15. import time
  16. #该类是用来打印格式
  17. class displayFormat(object):
  18. def format_size(self,size):
  19. '''''格式化流量单位'''
  20. KB = 1024#KB -> B B是字节
  21. MB = 1048576#MB -> B
  22. GB = 1073741824#GB -> B
  23. TB = 1099511627776#TB -> B
  24. if size >= TB :
  25. size = str(size / TB) + 'T'
  26. elif size < KB :
  27. size = str(size) + 'B'
  28. elif size >= GB and size < TB:
  29. size = str(size / GB) + 'G'
  30. elif size >= MB and size < GB :
  31. size = str(size / MB) + 'M'
  32. else :
  33. size = str(size / KB) + 'K'
  34. return size
  35. #定义字符串格式化
  36. formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
  37. def transverse_line(self) :
  38. '''''输出横线'''
  39. printself.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
  40. def head(self):
  41. '''''输出头部信息'''
  42. printself.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
  43. def error_print(self) :
  44. '''''输出错误信息'''
  45. print
  46. print'Usage : ' + sys.argv[0] + ' NginxLogFilePath [Number]'
  47. print
  48. sys.exit(1)
  49. def execut_time(self):
  50. '''''输出脚本执行的时间'''
  51. print
  52. print"Script Execution Time: %.3f second" % time.clock()
  53. print
  54. #该类是用来生成主机信息的字典
  55. class hostInfo(object):
  56. host_info = ['200','404','500','302','304','503','403','times','size']
  57. def __init__(self,host):
  58. self.host = host = {}.fromkeys(self.host_info,0)
  59. def increment(self,status_times_size,is_size):
  60. '''''该方法是用来给host_info中的各个值加1'''
  61. if status_times_size == 'times':
  62. self.host['times'] += 1
  63. elif is_size:
  64. self.host['size'] = self.host['size'] + status_times_size
  65. else:
  66. self.host[status_times_size] += 1
  67. def get_value(self,value):
  68. '''''该方法是取到各个主机信息中对应的值'''
  69. returnself.host[value]
  70. #该类是用来分析文件
  71. class fileAnalysis(object):
  72. def __init__(self):
  73. '''''初始化一个空字典'''
  74. self.report_dict = {}
  75. self.total_request_times,self.total_traffic,self.total_200, \
  76. self.total_404,self.total_500,self.total_403,self.total_302, \
  77. self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
  78. def split_eachline_todict(self,line):
  79. '''''分割文件中的每一行,并返回一个字典'''
  80. split_line = line.split()
  81. split_dict = {'remote_host':split_line[0],'status':split_line[8],\
  82. 'bytes_sent':split_line[9],}
  83. return split_dict
  84. def generate_log_report(self,logfile):
  85. '''''读取文件,分析split_eachline_todict方法生成的字典'''
  86. for line in logfile:
  87. try:
  88. line_dict = self.split_eachline_todict(line)
  89. host = line_dict['remote_host']
  90. status = line_dict['status']
  91. except ValueError :
  92. continue
  93. except IndexError :
  94. continue
  95. if host notinself.report_dict :
  96. host_info_obj = hostInfo(host)
  97. self.report_dict[host] = host_info_obj
  98. else :
  99. host_info_obj = self.report_dict[host]
  100. host_info_obj.increment('times',False)
  101. if status in host_info_obj.host_info :
  102. host_info_obj.increment(status,False)
  103. try:
  104. bytes_sent = int(line_dict['bytes_sent'])
  105. except ValueError:
  106. bytes_sent = 0
  107. host_info_obj.increment(bytes_sent,True)
  108. returnself.report_dict
  109. def return_sorted_list(self,true_dict):
  110. '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
  111. for host_key in true_dict :
  112. host_value = true_dict[host_key]
  113. times = host_value.get_value('times')
  114. self.total_request_times = self.total_request_times + times
  115. size = host_value.get_value('size')
  116. self.total_traffic = self.total_traffic + size
  117. o200 = host_value.get_value('200')
  118. o404 = host_value.get_value('404')
  119. o500 = host_value.get_value('500')
  120. o403 = host_value.get_value('403')
  121. o302 = host_value.get_value('302')
  122. o304 = host_value.get_value('304')
  123. o503 = host_value.get_value('503')
  124. true_dict[host_key] = {'200':o200,'404':o404,'500':o500,\
  125. '403':o403,'302':o302,'304':o304, \
  126. '503':o503,'times':times,'size':size}
  127. self.total_200 = self.total_200 + o200
  128. self.total_404 = self.total_404 + o404
  129. self.total_500 = self.total_500 + o500
  130. self.total_302 = self.total_302 + o302
  131. self.total_304 = self.total_304 + o304
  132. self.total_503 = self.total_503 + o503
  133. sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],\
  134. t[1]['size']),reverse=True)
  135. return sorted_list
  136. class Main(object):
  137. def main(self) :
  138. '''''主调函数'''
  139. display_format = displayFormat()
  140. arg_length = len(sys.argv)
  141. if arg_length == 1 :
  142. display_format.error_print()
  143. elif arg_length == 2or arg_length == 3:
  144. infile_name = sys.argv[1]
  145. try :
  146. infile = open(infile_name,'r')
  147. if arg_length == 3 :
  148. lines = int(sys.argv[2])
  149. else :
  150. lines = 0
  151. except IOError,e :
  152. print
  153. print e
  154. display_format.error_print()
  155. except ValueError :
  156. print
  157. print"Please Enter A Volid Number !!"
  158. display_format.error_print()
  159. else :
  160. display_format.error_print()
  161. fileAnalysis_obj = fileAnalysis()
  162. not_true_dict = fileAnalysis_obj.generate_log_report(infile)
  163. log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
  164. total_ip = len(log_report)
  165. if lines :
  166. log_report = log_report[0:lines]
  167. infile.close()
  168. print
  169. total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
  170. total_request_times = fileAnalysis_obj.total_request_times
  171. print'Total IP: %s Total Traffic: %s Total Request Times: %d' \
  172. % (total_ip,total_traffic,total_request_times)
  173. print
  174. display_format.head()
  175. display_format.transverse_line()
  176. for host in log_report :
  177. times = host[1]['times']
  178. times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
  179. print display_format.formatstring % (host[0],\
  180. display_format.format_size(host[1]['size']),\
  181. times,str(times_percent)[0:5],\
  182. host[1]['200'],host[1]['404'],\
  183. host[1]['500'],host[1]['403'],\
  184. host[1]['302'],host[1]['304'],host[1]['503'])
  185. if (not lines) or total_ip == lines :
  186. display_format.transverse_line()
  187. print display_format.formatstring % (total_ip,total_traffic, \
  188. total_request_times,'100%',\
  189. fileAnalysis_obj.total_200,\
  190. fileAnalysis_obj.total_404,\
  191. fileAnalysis_obj.total_500, \
  192. fileAnalysis_obj.total_403,\
  193. fileAnalysis_obj.total_302, \
  194. fileAnalysis_obj.total_304,\
  195. fileAnalysis_obj.total_503)
  196. display_format.execut_time()
  197. if __name__ == '__main__':
  198. main_obj = Main()
  199. main_obj.main()
linux
相关资讯       Apache日志分析  Nginx日志分析 
本文评论   查看全部评论 (0)
表情: 表情 姓名: 字数

       

评论声明
  • 尊重网上道德,遵守中华人民共和国的各项有关法律法规
  • 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
  • 本站管理人员有权保留或删除其管辖留言中的任意内容
  • 本站有权在网站内转载或引用您的评论
  • 参与本评论即表明您已经阅读并接受上述条款