fixing gradle build
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
# SMTP configuration for email alerts
|
||||
smtp_smarthost: 'smtp.example.com:587'
|
||||
smtp_from: 'alertmanager@meldestelle.at'
|
||||
smtp_auth_username: 'alertmanager@meldestelle.at'
|
||||
smtp_auth_password: 'password' # Use environment variable in production
|
||||
smtp_require_tls: true
|
||||
|
||||
# The root route on which each incoming alert enters.
|
||||
route:
|
||||
# The root route must not have any matchers as it is the entry point for all alerts
|
||||
# The default receiver is the one that handles alerts that don't match any of the specific routes
|
||||
receiver: 'email-notifications'
|
||||
|
||||
# How long to wait before sending a notification again if it has already been sent successfully
|
||||
repeat_interval: 4h
|
||||
|
||||
# How long to initially wait to send a notification for a group of alerts
|
||||
group_wait: 30s
|
||||
|
||||
# How long to wait before sending a notification about new alerts that are added to a group
|
||||
group_interval: 5m
|
||||
|
||||
# A default grouping of alerts
|
||||
group_by: ['alertname', 'cluster', 'service']
|
||||
|
||||
# Child routes for specific alert categories
|
||||
routes:
|
||||
- receiver: 'slack-critical'
|
||||
matchers:
|
||||
- severity="critical"
|
||||
repeat_interval: 1h
|
||||
|
||||
- receiver: 'slack-warnings'
|
||||
matchers:
|
||||
- severity="warning"
|
||||
repeat_interval: 12h
|
||||
|
||||
# Inhibition rules allow to mute a set of alerts given that another alert is firing
|
||||
inhibit_rules:
|
||||
- source_matchers:
|
||||
- severity="critical"
|
||||
target_matchers:
|
||||
- severity="warning"
|
||||
# Apply inhibition if the alertname is the same
|
||||
equal: ['alertname', 'cluster', 'service']
|
||||
|
||||
# Receivers define notification integrations
|
||||
receivers:
|
||||
- name: 'email-notifications'
|
||||
email_configs:
|
||||
- to: 'admin@meldestelle.at'
|
||||
send_resolved: true
|
||||
|
||||
- name: 'slack-critical'
|
||||
slack_configs:
|
||||
- api_url: 'https://hooks.slack.com/services/REPLACE_WITH_YOUR_WEBHOOK_URL'
|
||||
channel: '#alerts-critical'
|
||||
send_resolved: true
|
||||
title: '{{ .CommonAnnotations.summary }}'
|
||||
text: >-
|
||||
{{ range .Alerts }}
|
||||
*Alert:* {{ .Annotations.summary }}
|
||||
*Description:* {{ .Annotations.description }}
|
||||
*Severity:* {{ .Labels.severity }}
|
||||
*Instance:* {{ .Labels.instance }}
|
||||
{{ end }}
|
||||
|
||||
- name: 'slack-warnings'
|
||||
slack_configs:
|
||||
- api_url: 'https://hooks.slack.com/services/REPLACE_WITH_YOUR_WEBHOOK_URL'
|
||||
channel: '#alerts-warnings'
|
||||
send_resolved: true
|
||||
title: '{{ .CommonAnnotations.summary }}'
|
||||
text: >-
|
||||
{{ range .Alerts }}
|
||||
*Alert:* {{ .Annotations.summary }}
|
||||
*Description:* {{ .Annotations.description }}
|
||||
*Severity:* {{ .Labels.severity }}
|
||||
*Instance:* {{ .Labels.instance }}
|
||||
{{ end }}
|
||||
@@ -0,0 +1,62 @@
|
||||
groups:
|
||||
- name: meldestelle_alerts
|
||||
rules:
|
||||
# Alert for high memory usage
|
||||
- alert: HighMemoryUsage
|
||||
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) * 100 > 85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High memory usage ({{ $value }}%)"
|
||||
description: "JVM memory usage is above 85% for 5 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
|
||||
|
||||
# Alert for high CPU usage
|
||||
- alert: HighCpuUsage
|
||||
expr: process_cpu_usage > 0.85
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High CPU usage ({{ $value }})"
|
||||
description: "CPU usage is above 85% for 5 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
|
||||
|
||||
# Alert for high error rate
|
||||
- alert: HighErrorRate
|
||||
expr: sum(rate(http_server_requests_seconds_count{status=~"5.."}[5m])) / sum(rate(http_server_requests_seconds_count[5m])) * 100 > 5
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High error rate ({{ $value }}%)"
|
||||
description: "Error rate is above 5% for 2 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
|
||||
|
||||
# Alert for service unavailability
|
||||
- alert: ServiceUnavailable
|
||||
expr: up{job="meldestelle-server"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service unavailable"
|
||||
description: "Meldestelle service is down.\n Instance: {{ $labels.instance }}"
|
||||
|
||||
# Alert for slow response time
|
||||
- alert: SlowResponseTime
|
||||
expr: http_server_requests_seconds_sum / http_server_requests_seconds_count > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Slow response time ({{ $value }}s)"
|
||||
description: "Average response time is above 1 second for 5 minutes.\n Instance: {{ $labels.instance }}\n Path: {{ $labels.uri }}"
|
||||
|
||||
# Alert for high GC pause time
|
||||
- alert: HighGcPauseTime
|
||||
expr: jvm_gc_pause_seconds_sum / jvm_gc_pause_seconds_count > 0.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High GC pause time ({{ $value }}s)"
|
||||
description: "Average GC pause time is above 0.5 seconds for 5 minutes.\n Instance: {{ $labels.instance }}"
|
||||
Reference in New Issue
Block a user