教你如何在 AlertManager 报警通知中展示监控图表
- 作者: 健力宝
- 来源: 51数据库
- 2022-09-20
| 导读 | 今天换了另外一种方式来实现,直接去绘制渲染报警图表,然后上传到对象存储中保存起来,在钉钉中就可以直接展示了。Promoter 就是这个方案的一个实现。 |
之前用 Python 实现了一个非常简陋的 AlertManager 的钉钉接收器,一直想在钉钉的消息通知中将当前报警图表也展示出来,这样显然对用户来说更加友好。之前想的思路是通过爬虫的方式去 Prometheus 页面将 Graph 图形截图保存下来,该方式理论上确实是可行的,但是这种方式不稳定因素较多,而且会占用大量的资源。
今天换了另外一种方式来实现,直接去绘制渲染报警图表,然后上传到对象存储中保存起来,在钉钉中就可以直接展示了,Promoter 就是这个方案的一个实现,支持在消息通知中展示实时报警图表,效果图如下所示:

目前是将报警数据渲染成图片后上传到 S3 对象存储,所以需要配置一个对象存储(阿里云 OSS 也可以),此外消息通知展示样式支持模板定制.
模板
默认模板位于 template/default.tmpl,可以根据自己需求定制:
{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}
{{ define "default.__text_alert_list" }}{{ range . }}
### {{ .Annotations.summary }}
**详情:** {{ .Annotations.description }}
{{ range .Images }}
**条件:** `{{ .Title }}`

{{- end }}
**标签:**
{{ range .Labels.SortedPairs }}{{ if and (ne (.Name) "severity") (ne (.Name) "summary") }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}{{ end }}
{{ end }}{{ end }}
{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}
{{ if gt (len .Alerts.Firing) 0 -}}
#### **{{ .Alerts.Firing | len }} 条报警**
{{ template "default.__text_alert_list" .Alerts.Firing }}
{{ range .AtMobiles }}@{{ . }}{{ end }}
{{- end }}
{{ if gt (len .Alerts.Resolved) 0 -}}
#### **{{ .Alerts.Resolved | len }} 条报警恢复**
{{ template "default.__text_alert_list" .Alerts.Resolved }}
{{ range .AtMobiles }}@{{ . }}{{ end }}
{{- end }}
{{- end }}
部署
默认配置文件如下所示,放置在 /etc/promoter/config.yaml:
debug: true http_port: 8080 timeout: 5s prometheus_url:# Prometheus 的地址 metric_resolution: 100 s3: access_key: secret_key: endpoint: oss-cn-beijing.aliyuncs.com region: cn-beijing bucket: dingtalk: url: https://oapi.dingtalk.com/robot/send?access_token= secret: # secret for signature
可以直接使用 Docker 镜像 cnych/promoter:v0.1.1 部署,在 Kubernetes 中部署可以直接参考 deploy/kubernetes/promoter.yaml。
启动完成后在 AlertManager 配置中指定 Webhook 地址即可:
route:
group_by: ['alertname', 'cluster']
group_wait: 30s
group_interval: 2m
repeat_interval: 1h
receiver: webhook
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://promoter.kube-mon.svc.cluster.local:8080/webhook' # 配置 promoter 的 webhook 接口
send_resolved: true
核心原理
该项目采用 golang 实现,Webhook 的实现很简单,这里的核心部分是如何渲染监控图表,核心方式是通过 Prometheus 的 API 接口来获取查询的指标数据:
func Metrics(server, query string, queryTime time.Time, duration, step time.Duration) (promModel.Matrix, error) {
client, err := prometheus.NewClient(prometheus.Config{Address: server})
if err != nil {
return nil, fmt.Errorf("failed to create Prometheus client: %v", err)
}
api := prometheusApi.NewAPI(client)
value, _, err := api.QueryRange(context.Background(), query, prometheusApi.Range{
Start: queryTime.Add(-duration),
End: queryTime,
Step: duration / step,
})
if err != nil {
return nil, fmt.Errorf("failed to query Prometheus: %v", err)
}
metrics, ok := value.(promModel.Matrix)
if !ok {
return nil, fmt.Errorf("unsupported result format: %s", value.Type().String())
}
return metrics, nil
}
然后将获取的指标绘制出来,图形绘制使用的 gonum.org/v1/plot 这个包来实现的:
func PlotMetric(metrics promModel.Matrix, level float64, direction string) (io.WriterTo, error) {
p, err := plot.New()
if err != nil {
return nil, fmt.Errorf("failed to create new plot: %v", err)
}
textFont, err := vg.MakeFont("Helvetica", 3*vg.Millimeter)
if err != nil {
return nil, fmt.Errorf("failed to load font: %v", err)
}
evalTextFont, err := vg.MakeFont("Helvetica", 5*vg.Millimeter)
if err != nil {
return nil, fmt.Errorf("failed to load font: %v", err)
}
evalTextStyle := draw.TextStyle{
Color: color.NRGBA{A: 150},
Font: evalTextFont,
XAlign: draw.XRight,
YAlign: draw.YBottom,
}
p.X.Tick.Marker = plot.TimeTicks{Format: "15:04:05"}
p.X.Tick.Label.Font = textFont
p.Y.Tick.Label.Font = textFont
p.Legend.Font = textFont
p.Legend.Top = true
p.Legend.YOffs = 15 * vg.Millimeter
// Color palette for drawing lines
paletteSize := 8
palette, err := brewer.GetPalette(brewer.TypeAny, "Dark2", paletteSize)
if err != nil {
return nil, fmt.Errorf("failed to get color palette: %v", err)
}
colors := palette.Colors()
var lastEvalValue float64
for s, sample := range metrics {
data := make(plotter.XYs, 0)
for _, v := range sample.Values {
fs := v.Value.String()
if fs == "NaN" {
_, err := drawLine(data, colors, s, paletteSize, p, metrics, sample)
if err != nil {
return nil, err
}
data = make(plotter.XYs, 0)
continue
}
f, err := strconv.ParseFloat(fs, 64)
if err != nil {
return nil, fmt.Errorf("sample value not float: %s", v.Value.String())
}
data = append(data, plotter.XY{X: float64(v.Timestamp.Unix()), Y: f})
lastEvalValue = f
}
_, err := drawLine(data, colors, s, paletteSize, p, metrics, sample)
if err != nil {
return nil, err
}
}
var polygonPoints plotter.XYs
if direction == "<" {="" polygonpoints="plotter.XYs{{X:" p.x.min,="" y:="" level},="" {x:="" p.x.max,="" y:="" level},="" {x:="" p.x.max,="" y:="" p.y.min},="" {x:="" p.x.min,="" y:="" p.y.min}}="" }="" else="" {="" polygonpoints="plotter.XYs{{X:" p.x.min,="" y:="" level},="" {x:="" p.x.max,="" y:="" level},="" {x:="" p.x.max,="" y:="" p.y.max},="" {x:="" p.x.min,="" y:="" p.y.max}}="" }="" poly,="" err="" :="plotter.NewPolygon(polygonPoints)" if="" err="" !="nil" {="" return="" nil,="" err="" }="" poly.color="color.NRGBA{R:" 255,="" a:="" 40}="" poly.linestyle.color="color.NRGBA{R:" 0,="" a:="" 0}="" p.add(poly)="" p.add(plotter.newgrid())="" draw="" plot="" in="" canvas="" with="" margin="" margin="" :="6" *="" vg.millimeter="" width="" :="20" *="" vg.centimeter="" height="" :="10" *="" vg.centimeter="" c,="" err="" :="draw.NewFormattedCanvas(width," height,="" "png")="" if="" err="" !="nil" {="" return="" nil,="" fmt.errorf("failed="" to="" create="" canvas:="" %v",="" err)="" }="" cropedcanvas="" :="draw.Crop(draw.New(c)," margin,="" -margin,="" margin,="" -margin)="" p.draw(cropedcanvas)="" draw="" last="" evaluated="" value="" evaltext="" :="fmt.Sprintf(" latest"="" evaluation:="" %.2f",="" lastevalvalue)="" plottercanvas="" :="p.DataCanvas(cropedCanvas)" trx,="" try="" :="p.Transforms(&plotterCanvas)" evalrectangle="" :="evalTextStyle.Rectangle(evalText)" points="" :="[]vg.Point{" {x:="" trx(p.x.max)="" +="" evalrectangle.min.x="" -="" 8*vg.millimeter,="" y:="" try(lastevalvalue)="" +="" evalrectangle.min.y="" -="" vg.millimeter},="" {x:="" trx(p.x.max)="" +="" evalrectangle.min.x="" -="" 8*vg.millimeter,="" y:="" try(lastevalvalue)="" +="" evalrectangle.max.y="" +="" vg.millimeter},="" {x:="" trx(p.x.max)="" +="" evalrectangle.max.x="" -="" 6*vg.millimeter,="" y:="" try(lastevalvalue)="" +="" evalrectangle.max.y="" +="" vg.millimeter},="" {x:="" trx(p.x.max)="" +="" evalrectangle.max.x="" -="" 6*vg.millimeter,="" y:="" try(lastevalvalue)="" +="" evalrectangle.min.y="" -="" vg.millimeter},="" }="" plottercanvas.fillpolygon(color.nrgba{r:="" 255,="" g:="" 255,="" b:="" 255,="" a:="" 90},="" points)="" plottercanvas.filltext(evaltextstyle,="" vg.point{x:="" trx(p.x.max)="" -="" 6*vg.millimeter,="" y:="" try(lastevalvalue)},="" evaltext)="" return="" c,="" nil="" }="" func="" drawline(data="" plotter.xys,="" colors="" []color.color,="" s="" int,="" palettesize="" int,="" p="" *plot.plot,="" metrics="" prommodel.matrix,="" sample="" *prommodel.samplestream)="" (*plotter.line,="" error)="" {="" var="" l="" *plotter.line="" var="" err="" error="" if="" len(data)=""> 0 {
l, err = plotter.NewLine(data)
if err != nil {
return &plotter.Line{}, fmt.Errorf("failed to create line: %v", err)
}
l.LineStyle.Width = vg.Points(1)
l.LineStyle.Color = colors[s%paletteSize]
p.Add(l)
if len(metrics) > 1 {
m := labelText.FindStringSubmatch(sample.Metric.String())
if m != nil {
p.Legend.Add(m[1], l)
}
}
}
return l, nil
}">
推荐阅读
