zabbix 告警/恢复 消息优化

Posted by ZhangShun Blog on March 11, 2019

前言

之前一篇博客中介绍了zabbix的阶段告警,zabbix 告警、恢复消息次数,但是存在一个问题,当触发器设置为多重的话,那么触发器每触发一次,会产生一条告警消息跟一条恢复消息,当告警恢复后,恢复信息也跟着发送多次了,经常导致企业微信流量不够用。如果触发器的生成模式设置为单个,发送的告警消息不能即时刷新,只能显示触发时的状态。

  • 阶段一:多重告警,多重恢复
  • 阶段二:多重告警,恢复消息只有一条(发送的告警消息不能即时刷新)
  • 阶段三:告警/恢复 消息优化

告警/恢复 消息优化架构

优化架构

  1. 所有产生告警均由zabbix调用脚本推入缓存redis当中
  2. 分析系统将在规定时间(1分钟)内去redis中拉取数据,根据定义好的一系列规则进行,合并、分析或直接丢弃,并存入分析平台数据库,以便供历史查询。
  3. 根据预先定义好的规则将报警通过定义好的方式发送给相关人员。

对zabbix进行设置

zabbix Actions

对Actions进行特殊设置,Default subject极为重要,是识别收敛的标示,每一个触发器触发会生成一个唯一的事件id,EVENT.ID

action1

action2

Default subject

{EVENT.ID}_1

Default message

1
Type|PROBLEM#Status|{TRIGGER.STATUS}#triggervalue|{TRIGGER.VALUE}#hostname|{HOST.NAME1}#ipaddress|{IPADDRESS}#hostgroup|{TRIGGER.HOSTGROUP.NAME}#triggernseverity|{TRIGGER.NSEVERITY}#triggername|{TRIGGER.NAME}#triggerkey|{TRIGGER.KEY1}#triggeritems|{ITEM.NAME}#itemvalue|{ITEM.VALUE}#eventid|{EVENT.ID}#Date|{EVENT.DATE}-{EVENT.TIME}#actionid|{ACTION.ID}

Recovery subject

{EVENT.ID}_0

Recovery message

1
Type|RECOVERY#Status|{TRIGGER.STATUS}#triggervalue|{TRIGGER.VALUE}#hostname|{HOST.NAME1}#ipaddress|{IPADDRESS}#hostgroup|{TRIGGER.HOSTGROUP.NAME}#triggernseverity|{TRIGGER.NSEVERITY}#triggername|{TRIGGER.NAME}#triggerkey|{TRIGGER.KEY1}#triggeritems|{ITEM.NAME}#itemvalue|{ITEM.VALUE}#eventid|{EVENT.ID}#Date|{EVENT.DATE}-{EVENT.TIME}#actionid|{ACTION.ID}

告警媒介

这里只需要传递subject 参数就可以了,保存到redis中

告警媒介

police.py

1
2
3
4
5
6
7
#!/usr/bin/env python
#coding:utf-8
import redis
import sys
subject=sys.argv[1]
r = redis.StrictRedis(host='*.*.*.*', port=6379)
r.set(subject,subject)

操作部分

1.环境安装

1
2
3
pip install redis
pip install mysql-python
yum install gcc python-devel

2.脚本部署

  • dbread.py——数据库查询函数:接收事件id参数,根据唯一的eventid在zabbix数据库内查询并返回告警信息
  • operation.py——操作函数:接收dbread.py返回的告警信息,进行告警合并,告警压缩处理。并返回处理结果
  • weixin.py——发送微信告警通知函数:根据不同的actionid,将处理结果发送至指定运维人员
  • allpolice.py——综合函数:将①②③整合起来,crontab定时执行,定时每1分钟扫描redis,返回eventid

allpolice.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import MySQLdb
import redis
import sys
from dbread import *
from operation import *
from weixin import *
import datetime,time
sendtime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
#连接redis,并读取所有事件id
r = redis.StrictRedis(host='*.*.*.*', port=6379)


subjectlist=r.keys()
for i in subjectlist:
	r.delete(i)
#获取原始数据并存入数据库
originallist=[]
for subject in subjectlist:
	a=alerts_eventid(subject)
	originallist.append(a)
problem=mergeproblem(originallist)
normal=mergenormal(originallist)
#发送告警信息
messagelist=compressproblem(problem)
if len(messagelist) != 0:
	for content in  messagelist:
		CropID,Secret,totag,agentid = find(content)
		accesstoken = gettoken(CropID,Secret)
		senddata(accesstoken,content.values()[0],totag,agentid)
#发送恢复信息    
messagelist=compressnormal(normal)
if len(messagelist) != 0:
	for content in  messagelist:
		CropID,Secret,totag,agentid = find(content)
		accesstoken = gettoken(CropID,Secret)
		senddata(accesstoken,content.values()[0],totag,agentid)

dbread.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python
# -*- coding:utf-8 -*-
import MySQLdb
import datetime,time
import sys
from Crypto.Cipher import AES

#算法
key = '0x7jz75nmrjx5k52lcqpybm12b1frbmn'
iv='1234567812345678'
BS = 16
pad   = lambda s: s + (BS - len(s) % BS) * '\0'

#加密
def encrypt(text):
        aes_obj = AES.new(key, AES.MODE_CBC,iv)
        buf=aes_obj.encrypt(pad(text)).encode('hex')
        return buf
#解密
def decrypt(text):
        aes_obj = AES.new(key, AES.MODE_CBC,iv)
        buf=aes_obj.decrypt(text.decode('hex'))
        return buf

#定义通过actionid和subject获取数据库告警具体信息,并以字典形式返回
def alerts_eventid(subject):
	try:
		conn=MySQLdb.connect(host='*.*.*.*',user='zabbix',passwd=decrypt('2787640721001aab321005fcfed386f3').replace("\x00",""),db='zabbix',port=3306)
		cursor = conn.cursor()
		cursor.execute("SET NAMES utf8");
		sql = "SELECT * FROM alerts where subject = '%s' ;" % subject
		cursor.execute(sql)
		data = cursor.fetchall()
		cursor.close()
		conn.close()
		event=data[0]
		messagelist=[]
		message=event[8]
		messageone=message.split('#')
		for i in messageone:
			messagelist.append(i.split('|'))
	#	print messagelist
		messagedict=dict(messagelist)
		return messagedict
	except MySQLdb.Error,e:
		print "Mysql Error %d: %s" % (e.args[0], e.args[1])

operation.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import datetime,time
import sys

reload(sys)
sys.setdefaultencoding('utf-8')
#告警合并
def mergeproblem(originallist):
	problemlist=[]
	normalist=[]
	Unknown=[]
	triggerkeylist=[]
	sorts=[]
	alarminfo=[]
	#告警or恢复
	for origina in originallist:
		if origina['triggervalue']=='1' :            
			problemlist.append(origina)
		else :
			Unknown.append(origina)
	return problemlist
#恢复合并
def mergenormal(originallist):
	normallist=[]
	Unknown=[]
	triggerkeylist=[]
	sorts=[]
	alarminfo=[]
	#告警or恢复
	for origina in originallist:
		if origina['triggervalue']=='0' :            
			normallist.append(origina)
		else :
			Unknown.append(origina)
	return normallist
	
#告警压缩
def compressproblem(alarminfo):
	currenttime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
	messagelist=[]
	send_data=[]
	for info in alarminfo:
		Host = info['hostname']
		Type = info['Type']
		Service = info['triggername']
		Status = info['Status']
		Date = info['Date']
		Additional = info['triggeritems'] + ':' + info['itemvalue']
		eventid = info['eventid']
		actionid = info['actionid']
		message='告警◕﹏◕\n'+'通知类型:'+Type+'\n服务:'+Service+'\n主机:'+Host+'\n状态:'+Status+'\n日期时间:'+Date+'\n事件ID:'+eventid+'\n附加信息:\n'+Additional
		messagelist.append(('%s|%s' %(actionid,message)).split('|'))
		messagedict=dict(messagelist)
		messagelist=[]
		send_data.append(messagedict)
	return send_data
	
	
#恢复压缩
def compressnormal(alarminfo):
	currenttime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))
	messagelist=[]
	valuelist=[]
	duplicate=[]
	send_data=[]
	for info in alarminfo:
		value = info['itemvalue']
		if value in valuelist:
			duplicate.append(info)
		else:
			valuelist.append(value)
			Host = info['hostname']
        	        Type = info['Type']
        	        Service = info['triggername']
        	        Status = info['Status']
        	        Date = info['Date']
        	        Additional = info['triggeritems'] + ':' + info['itemvalue']
        	        eventid = info['eventid']
			actionid = info['actionid']
			message='恢复◕‿◕\n'+'通知类型:'+Type+'\n服务:'+Service+'\n主机:'+Host+'\n状态:'+Status+'\n日期时间:'+currenttime+'\n事件ID:'+eventid+'\n附加信息:\n'+Additional
			messagelist.append(('%s|%s' %(actionid,message)).split('|'))
			messagedict=dict(messagelist)
			messagelist=[]
			send_data.append(messagedict)
	return send_data

weixin.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
# coding: utf-8
import urllib,urllib2
import json
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

#每添加一个动作需在列表中添加actionid,不同的actionid发送到不同的企业微信应用
def find(content):
	if content.keys()[0] in ['17']:
		CropID='xxxxxxxxxxxxxxxxxx'
		Secret='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
		totag='8'
		agentid='1000017'
	elif content.keys()[0] in ['32','19','20','33']:
		CropID='xxxxxxxxxxxxxxxxxx'
		Secret='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
		totag='7'
        agentid='1000016'
	return CropID,Secret,totag,agentid

def gettoken(CropID,Secret):
	GURL="https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid="+CropID+"&corpsecret="+Secret
	token_file = urllib2.urlopen(GURL)
	token_data = token_file.read().decode('utf-8')
	token_json = json.loads(token_data)
	token_json.keys()
	token = token_json['access_token']
	return token

def senddata(access_token,content,totag,agentid):
	PURL="https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token="+access_token
	send_values = {
	    "totag":totag,    #企业号中的用户帐号,在zabbix用户Media中配置,如果配置不正常,将按部门发送。
	    "msgtype":"text",  #消息类型
	    "agentid":agentid,  #填写企业号中的应用id,
	    "text":{
	        "content":content
	       },
	    "safe":"0"
	    }
	send_data = json.dumps(send_values, ensure_ascii=False)
	send_request = urllib2.Request(PURL, send_data)
	response = json.loads(urllib2.urlopen(send_request).read())
	print str(response)

if __name__ == '__main__':
	user = str(sys.argv[1])   #zabbix传过来的第一个参数
	content = str(sys.argv[3])  #zabbix传过来的第三个参数
	accesstoken = gettoken()
	senddata(accesstoken,user,content)