這期內(nèi)容當(dāng)中小編將會(huì)給大家?guī)?lái)有關(guān)使用python如何清理磁盤日志,文章內(nèi)容豐富且以專業(yè)的角度為大家分析和敘述,閱讀完這篇文章希望大家可以有所收獲。

一、描述:
以module的方式組件python代碼,在磁盤文件清理上復(fù)用性更好
二、達(dá)到目標(biāo):
清空過(guò)期日志文件,清理掉超過(guò)自定大小日志文件
三、原碼
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import commands
import os
import time
import re
import getopt
import sys
# commands.getstatusoutput 返回兩個(gè)元素的元組tuple(status, result),status為int類型,result為string類型
def execute_local_shell_cmd(cmd):
status, result = commands.getstatusoutput(cmd)
result = result.split("\n")
return status, result
def send_alert_mail():
pass
'''
獲取某一磁盤的空間使用率
'''
def get_disk_used(disk_name):
status, result = execute_local_shell_cmd("df | grep %s | awk '{print $5}'" % disk_name)
return status, result[0]
#print(get_disk_used('/data0'))
'''
判斷文件是否在指定時(shí)間內(nèi)修改過(guò)
'''
def file_modify_in(file_path,time_interval='1d'):
current_time = time.time()
# os.path.getmtime 返回最后修改時(shí)間。返回從unix紀(jì)元開始的跳秒數(shù)
if current_time - os.path.getmtime(file_path) < translate_time_interval_to_second(time_interval):
return True
return False
def translate_file_size_to_kb(file_size):
# 將字符串所有大寫字符轉(zhuǎn)為小寫
file_size = str(file_size.lower())
# 創(chuàng)建匹配數(shù)字1次或多次的數(shù)字且小數(shù)點(diǎn)出現(xiàn)一次或者不出現(xiàn)的;小數(shù)點(diǎn)后數(shù)字重復(fù)0次或多次模式對(duì)象
pattern = re.compile(r'\d+\.?\d*')
match = pattern.match(file_size)
file_size_number = None
if match:
# 使用Match獲得分組信息
#print(match.group())
file_size_number = float(match.group())
else:
raise IOError("Input {0} can't translate to byte."
"Current support g(gb)/m(mb)/k(kb)/b(byte)".format(file_size))
# endswith() 方法用于判斷字符串是否以指定后綴結(jié)尾,如果以指定后綴結(jié)尾返回True,否則返回False。
# 可選參數(shù)"start"與"end"為檢索字符串的開始與結(jié)束位置。
if file_size.endswith("g") or file_size.endswith("gb"):
return file_size_number * 1024 * 1024 * 1024
elif file_size.endswith("m") or file_size.endswith("mb"):
return file_size_number * 1024 * 1024
elif file_size.endswith("k") or file_size.endswith("kb"):
return file_size_number * 1024
elif file_size.endswith("b") or file_size.endswith("byte"):
return file_size_number
else:
raise IOError("Input {0} can't translate to byte."
"Current support g(gb)/m(mb)/k(kb)/b(byte)".format(file_size))
#print(translate_file_size_to_kb('10g'))
def translate_time_interval_to_second(time_interval):
date_interval = str(time_interval.lower())
pattern = re.compile(r'\d+')
match = pattern.match(date_interval)
date_interval_number = None
if match:
date_interval_number = int(match.group())
else:
raise IOError("Input {0} can't translate to second."
"Current support d(day)/h(hour)/m(min)/s(sec)".format(date_interval))
if date_interval.endswith('d') or date_interval.endswith('day'):
return date_interval_number * 24 * 3600
elif date_interval.endswith('h') or date_interval.endswith('hour'):
return date_interval_number * 3600
elif date_interval.endswith('m') or date_interval.endswith('min'):
return date_interval_number * 60
elif date_interval.endswith('s') or date_interval.endswith('sec'):
return date_interval_number
else:
raise IOError("Input {0} cant't translate to second."
"Current support d(day)/h(hour)/m(min)/s(second)".format(date_interval))
#print(translate_time_interval_to_second('7d'))
'''
關(guān)斷文件是否可能是當(dāng)前l(fā)og文件
1) 修改改時(shí)間1天內(nèi)
2) 以pattern結(jié)尾
'''
def probable_current_log_file(file_path,pattern='log',modify_in='1d'):
if file_modify_in(file_path,time_interval=modify_in):
return True
return str(file_path).endswith(pattern)
'''
獲取超過(guò)天數(shù)設(shè)置log,注意不會(huì)返回可能是當(dāng)前正在修改的文件,查看probable_current_log_file
確定如何做該判斷
'''
def get_clean_log_list_by_date(target_dir,before_days_remove='7d',pattern="log"):
before_seconds_remove = translate_time_interval_to_second(before_days_remove)
current_time = time.time()
# os.listdir 返回指定文件夾包含文件或文件夾的名字列表
for candidate_file in os.listdir(target_dir):
candidate_file_fullpath = "%s/%s" %(target_dir,candidate_file)
# 是否存在一個(gè)普通文件
if os.path.isfile(candidate_file_fullpath):
candidate_file_mtime = os.path.getmtime(candidate_file_fullpath)
# find\(\)根據(jù)是否包含字符串,如果包含有,返回開始的索引值,否則返回-1
if current_time - candidate_file_mtime > before_seconds_remove \
and candidate_file.find(pattern) != -1 \
and not probable_current_log_file(candidate_file_fullpath):
# yield 就是return一個(gè)值,并且記住這個(gè)返回值的位置,下次迭代就從這個(gè)位置后開始
yield candidate_file_fullpath
'''
獲取超過(guò)大小的日志文件(注意默認(rèn)不會(huì)返回修改時(shí)間小于1天的文件)
'''
def get_clean_log_list_by_size(target_dir,file_size_limit='10g',pattern="log"):
file_size_limit_byte = translate_file_size_to_kb(file_size_limit)
for candidate_file in os.listdir(target_dir):
candidate_file_fullpath = "%s/%s" %(target_dir,candidate_file)
if os.path.isfile(candidate_file_fullpath):
# stat返回相關(guān)文件的系統(tǒng)狀態(tài)信息
file_stat = os.stat(candidate_file_fullpath)
if candidate_file.find(pattern) != -1 and \
file_stat.st_size >= file_size_limit_byte:
yield candidate_file_fullpath
# 如果文件在modify_in之內(nèi)修改過(guò),則不返回
# if not (modify_in and file_modify_in(candidate_file_fullpath, time_interval=modify_in)) and \
# not probable_current_log_file(candidate_file_fullpath):
# yield candidate_file_fullpath
'''
remove文件列表
'''
def remove_file_list(file_list,pattern='log',roll_back=False):
for file_item in file_list:
if roll_back or probable_current_log_file(file_item,pattern=pattern,modify_in='1d'):
print('roll back file %s' % file_item)
execute_local_shell_cmd("cat /dev/null > {0}".format(file_item))
else:
print('remove file %s' % file_item)
# os.remove 刪除指定路徑文件。如果指定的路徑是一個(gè)目錄,將拋出OSError
os.remove(file_item)
'''
清理掉超過(guò)日期的日志文件
'''
def remove_files_by_date(target_dir,before_days_remove='7d',pattern='log'):
file_list = get_clean_log_list_by_date(target_dir,before_days_remove,pattern)
remove_file_list(file_list)
'''
清理掉超過(guò)大小的日志文件
'''
def remove_files_by_size(target_dir,file_size_limit='10g',pattern='log'):
file_list = get_clean_log_list_by_size(target_dir,file_size_limit,pattern)
remove_file_list(file_list)
'''
清空當(dāng)前的日志文件,使用cat /dev/null > {log_file}方式
'''
def clean_curren_log_file(target_dir,file_size_limit='10g',pattern='log'):
for candidate_file in os.listdir(target_dir):
candidate_file_fullpath = '%s/%s' % (target_dir,candidate_file)
if candidate_file.endswith(pattern) and os.path.isfile(candidate_file_fullpath):
file_stat = os.stat(candidate_file_fullpath)
if file_stat.st_size >= translate_file_size_to_kb(file_size_limit):
remove_file_list([candidate_file_fullpath],roll_back=True)
def clean_data_release_disk(disk_name, target_dir, disk_used_limit='80%', before_days_remove='7d',
file_size_limit='10g', pattern='log'):
disk_used_limit = disk_used_limit.replace('%', '')
# 第一步執(zhí)行按時(shí)間的日志清理
print('Step one remove files {0} ago.'.format(before_days_remove))
remove_files_by_date(target_dir, before_days_remove=before_days_remove, pattern=pattern)
# 如果磁盤空間還是沒(méi)有充分釋放,則執(zhí)行按大小的日志清理
current_disk_used = int(get_disk_used(disk_name)[1].replace('%', ''))
if current_disk_used > int(disk_used_limit):
print("Disk {0}'s current used {1}% great than input used limit {2}%,"
"so we will remove files bigger than {3}".
format(disk_name, current_disk_used, disk_used_limit, file_size_limit))
remove_files_by_size(target_dir, file_size_limit=file_size_limit, pattern=pattern)
# 如果磁盤空間開沒(méi)有釋放,清空當(dāng)前正在寫的log文件,并alert
current_disk_used = int(get_disk_used(disk_name)[1].replace('%', ''))
if current_disk_used > int(disk_used_limit):
print("Disk {0}'s current used {1}% great than input used limit {2}%,"
"so we will roll back current log file".
format(disk_name, current_disk_used, disk_used_limit, file_size_limit))
clean_curren_log_file(target_dir, file_size_limit=file_size_limit, pattern=pattern)
# 如果還是沒(méi)有,alert mail
if int(get_disk_used(disk_name)[1].replace('%', '')) > int(disk_used_limit):
send_alert_mail()
def usage():
print('clean.py -d <target_disk> -r <target_dirctory -u <diskUsedLimit(default 80%)> '
'-f <fileSizeLimit(default 10gb,gb/mb/kb)> -p <filePattern(default log)> '
'-t <beforeDaysRemove(default 7d,d)> ')
if __name__ == "__main__":
target_disk_input = '/data0'
target_dir_input = '/data0/hadoop2/logs'
disk_used_limit_input = '80%'
file_size_limit_input = '10g'
pattern_input = 'log'
before_days_remove_input = '7d'
try:
# getopt 命令解析,有短選項(xiàng)和長(zhǎng)選項(xiàng)
# getopt 返回兩人個(gè)參數(shù):一個(gè)對(duì)應(yīng)參數(shù)選項(xiàng)和value元組,另一個(gè)一般為空
opts,args = getopt.getopt(sys.argv[1:], 'hd:r:u:f:p:t:', ['help' 'disk=', 'directory=',
'diskUsedLimit=', 'fileSizeLimit=',
'filePattern=', 'beforeDaysRemove='])
# getopt模塊函數(shù)異常錯(cuò)誤,捕獲異常并打印錯(cuò)誤
except getopt.GetoptError as err:
print err
usage()
sys.exit(2)
if len(opts) < 6:
usage()
sys.exit(2)
for opt,arg in opts:
if opt == '-h':
usage()
sys.exit()
elif opt in ("-d","--disk"):
target_disk_input = arg.replace('/','')
elif opt in ("-r","--directory"):
target_dir_input = arg
elif opt in ("-u","--diskUsedLimit"):
disk_used_limit_input = arg
elif opt in ("-f","--fileSizeLimit"):
file_size_limit_input = arg
translate_file_size_to_kb(file_size_limit_input)
elif opt in ("-p","filePattern"):
pattern_input = arg
elif opt in ("-t","--beforeDaysRemove"):
before_days_remove_input = arg
translate_time_interval_to_second(before_days_remove_input)
print ("{0} Start clean job.target_disk:{1},target_directory:{2},disk_used_limit:{3},"
"file_size_limit:{4},pattern:{5},before_days_remove:{6}".format(time.ctime(time.time()),
target_disk_input, target_dir_input,
disk_used_limit_input, file_size_limit_input,
pattern_input, before_days_remove_input))
clean_data_release_disk(target_disk_input, target_dir_input,
disk_used_limit=disk_used_limit_input, file_size_limit=file_size_limit_input,
pattern=pattern_input, before_days_remove=before_days_remove_input)
分享名稱:使用python如何清理磁盤日志-創(chuàng)新互聯(lián)
文章來(lái)源:http://www.chinadenli.net/article22/dgedcc.html
成都網(wǎng)站建設(shè)公司_創(chuàng)新互聯(lián),為您提供網(wǎng)頁(yè)設(shè)計(jì)公司、電子商務(wù)、網(wǎng)站營(yíng)銷、自適應(yīng)網(wǎng)站、App設(shè)計(jì)、標(biāo)簽優(yōu)化
聲明:本網(wǎng)站發(fā)布的內(nèi)容(圖片、視頻和文字)以用戶投稿、用戶轉(zhuǎn)載內(nèi)容為主,如果涉及侵權(quán)請(qǐng)盡快告知,我們將會(huì)在第一時(shí)間刪除。文章觀點(diǎn)不代表本網(wǎng)站立場(chǎng),如需處理請(qǐng)聯(lián)系客服。電話:028-86922220;郵箱:631063699@qq.com。內(nèi)容未經(jīng)允許不得轉(zhuǎn)載,或轉(zhuǎn)載時(shí)需注明來(lái)源: 創(chuàng)新互聯(lián)
猜你還喜歡下面的內(nèi)容