进程监控
https://github.com/ncabatoff/process-exporter
docker
docker run -d --rm -p 9256:9256 --privileged -v /proc:/host/proc -v `pwd`:/config ncabatoff/process-exporter --procfs /host/proc -config.path /config/filename.yml
version: '3.8'
services:
process-exporter:
image: ncabatoff/process-exporter:latest
container_name: process-exporter
restart: unless-stopped
privileged: true
ports:
- "9256:9256"
volumes:
# 映射宿主机的 /proc 到容器内的 /host/proc
- /proc:/host/proc:ro
# 映射当前目录到容器内的 /config (对应原命令中的 `pwd`)
# 请确保当前目录下有一个名为 filename.yml 的配置文件
- ./:/config
command:
- "--procfs=/host/proc"
- "--config.path=/config/filename.yml"
./process-exporter --config.path process.yml
process_names:
- name: dolphinscheduler-master
cmdline:
- MasterServer
- name: dolphinscheduler-worker
cmdline:
- WorkerServer
name:
给这一组进程起个逻辑名字
这个名字 不会影响匹配
只会体现在 Prometheus 指标的 name label 里
cmdline:
匹配规则(最关键)
含义是:
只要某个进程的 启动命令行(cmdline)里包含 MasterServer 这个字符串
就认为它是 dolphinscheduler-master
等价于 Linux 上
ps -ef | grep MasterServer
- job_name: process_exporter
static_configs:
- targets: ["localhost:9256"]
namedprocess_namegroup_num_procs{groupname="dolphinscheduler-master"}
namedprocess_namegroup_num_procs{groupname="dolphinscheduler-worker"}
告警规则
groups:
- name: dolphinscheduler服务监控
rules:
- alert: DolphinSchedulerMasterProcessDown
expr: namedprocess_namegroup_num_procs{groupname="dolphinscheduler-master"} < 1
for: 10s
labels:
severity: warning
- alert: DolphinSchedulerWorkerProcessDown
expr: namedprocess_namegroup_num_procs{groupname="dolphinscheduler-worker"} < 1
for: 10s
labels:
severity: warning