可以通过监控导出某一个时间节点的日志,分析URL流量大小,是否没走 cdn
通过对 nginx 的 access.log 日志来简单分析 nginx 的实时流量, access.log 的日志格式要用 json 格式化输出
修改 nginx 日志格式:
log_format main
'{"remote_addr":"$remote_addr","remote_user":"$remote_user","time_local":"$time_iso8601","request":"$request","status":"$status","body_bytes_sent":"$body_bytes_sent","http_referer":"$http_referer","http_user_agent":"$http_user_agent","http_x_forwarded_for":"$http_x_forwarded_for"}';
access_log /usr/local/nginx/logs/access/blog.log main;
运行脚本
python nginx_netflow.py -f /var/nginx.log -n 100 tail后100行
python nginx_netflow.py 全部文件,超过100M tail最后10w行
脚本主要从日志中获取 body_bytes_sent 发送的包体大小,累加而成
#!/usr/bin/env python
# set coding: utf-8
# description: nginx access log 下流量计算 根据send_body字段 以不同的request url来查询,
# 流量越集中的请求,越需要优化
# usage:
# python nginx_netflow.py 100 tail后100行
# python nginx_netflow.py 全部文件,超过100M tail最后10w行
#
# todo: 输入ip地址,列出访问地址top N ; 可选输入 access.log路径 ; >10M 时输出kb
#
__author__ = "richardzgt"
import os,sys
import re
from collections import defaultdict
from optparse import OptionParser
import time
import linecache
NGINX_FILE = '/var/nginx.log'
FILE_MAX_SIZE = 100 # 100M 1024*1024*100
TOP_N = 5 # 打印前流量 5
LAST_LINE_NUM = 10000
REQUEST_WITH_Q = re.compile(r'(GET|POST)\s(\/\S+)\?')
REQUEST_WITHOUT_Q = re.compile(r'(GET|POST)\s(\/\S+)')
def exeTime(func):
def newFunc(*args, **kwargs):
t0 = time.time()
print "@%s, {%s} start" % (time.strftime("%X", time.localtime()), func.__name__)
back = func(*args, **kwargs)
print "@%s, {%s} end" % (time.strftime("%X", time.localtime()), func.__name__)
print "@%.3fs taken for {%s}" % (time.time() - t0, func.__name__)
return back
return newFunc
def args_options():
parser = OptionParser()
parser.add_option("-f", "--file", dest="file", action="store",
help="input access.log directory")
# parser.add_option("-s",dest="show",action="store_true",default=False,
# help=u"show access.log")
parser.add_option("-n",dest="nums",action="store",
help="tail the access log file,default is [%s]" % LAST_LINE_NUM)
return parser
def group_by(group_dict,group_str,body_bytes_sent):
if group_dict.get(group_str):
group_dict[group_str]['sum_bytes'] += body_bytes_sent
group_dict[group_str]['count'] += 1
else:
group_dict[group_str] = {'sum_bytes': body_bytes_sent,'count':1}
return group_dict
# return group_dict
def order_bytes(group_dict):
return sorted(group_dict, key=lambda name: group_dict[name]['sum_bytes'], reverse=-1)
def print_order(get_group_type,get_order_btyes):
print("%s" % ('=='*10))
for index,each in enumerate(get_order_btyes):
if index >= TOP_N:
break
sum_bytes = get_group_type.get(each).get('sum_bytes')
count = get_group_type.get(each).get('count')
print("%s match %s times,bring \033[1;31;m %0.fMB \033[0m" % (each,count,sum_bytes/1024/1024))
def request_url(url):
if url.find('?') > 0:
regex = REQUEST_WITH_Q.match(url)
else:
regex = REQUEST_WITHOUT_Q.match(url)
if regex is not None:
return regex.groups()[1]
return 'NONE_REGEX_CONTENT'
# @exeTime
def all_file_handle(file_obj):
group_by_ip = defaultdict(list)
group_by_request = defaultdict(list)
group_by_action = defaultdict(list)
group_by_server_name = defaultdict(list)
total_bytes = 0
start_time = ""
for eachline in file_obj:
try:
data = eval(eachline)
if not start_time:
start_time = data['time_local']
body_bytes_sent = int(data['body_bytes_sent'])
total_bytes += body_bytes_sent
group_by(group_by_ip,data['remote_addr'],body_bytes_sent)
group_by(group_by_request,request_url(data['request']),body_bytes_sent)
group_by(group_by_server_name,data['server_name'],body_bytes_sent)
except TypeError as e:
continue
for each_group in (group_by_ip,group_by_request,group_by_server_name):
get_order_bytes = order_bytes(each_group)
print_order(each_group,get_order_bytes)
print("first line start: %s" % (start_time))
print("total: %0.1f MB" % (total_bytes/1024/1024))
def echo_color(content):
print("\033[;1m%s\033[0m" % content)
def main(nums=0):
try:
file_obj = open(NGINX_FILE,'r')
except IOError:
echo_color("not found file")
sys.exit()
if nums != 0 :
handle_file = linecache.getlines(NGINX_FILE)[-nums:]
elif os.stat(NGINX_FILE).st_size < FILE_MAX_SIZE*1024*1024:
handle_file = file_obj
else:
print('too big,use limit row num,default is %s' % LAST_LINE_NUM)
handle_file = linecache.getlines(NGINX_FILE)[-LAST_LINE_NUM:]
all_file_handle(handle_file)
if __name__ == '__main__':
parser = args_options()
(options, args) = parser.parse_args()
if options.file:
NGINX_FILE=options.file
if options.nums:
nums = options.nums
if nums.isdigit():
nums = int(nums)
main(nums)
else:
print("not get lines num")
else:
main()