refactoring: Env-Dateien und Docker-Dateien

This commit is contained in:
2025-11-20 22:03:37 +01:00
parent 695e28c4c6
commit 69032cb6e7
66 changed files with 409 additions and 924 deletions
@@ -0,0 +1,82 @@
global:
resolve_timeout: 5m
# SMTP configuration for email alerts - use environment variables
smtp_smarthost: '${SMTP_SMARTHOST:-smtp.example.com:587}'
smtp_from: '${SMTP_FROM:-alertmanager@meldestelle.at}'
smtp_auth_username: '${SMTP_AUTH_USERNAME:-alertmanager@meldestelle.at}'
smtp_auth_password: '${SMTP_AUTH_PASSWORD}'
smtp_require_tls: true
# The root route on which each incoming alert enters.
route:
# The root route must not have any matchers as it is the entry point for all alerts
# The default receiver is the one that handles alerts that don't match any of the specific routes
receiver: 'email-notifications'
# How long to wait before sending a notification again if it has already been sent successfully
repeat_interval: 4h
# How long to initially wait to send a notification for a group of alerts
group_wait: 30s
# How long to wait before sending a notification about new alerts that are added to a group
group_interval: 5m
# A default grouping of alerts
group_by: ['alertname', 'cluster', 'service']
# Child routes for specific alert categories
routes:
- receiver: 'slack-critical'
matchers:
- severity="critical"
repeat_interval: 1h
- receiver: 'slack-warnings'
matchers:
- severity="warning"
repeat_interval: 12h
# Inhibition rules allow to mute a set of alerts given that another alert is firing
inhibit_rules:
- source_matchers:
- severity="critical"
target_matchers:
- severity="warning"
# Apply inhibition if the alertname is the same
equal: ['alertname', 'cluster', 'service']
# Receivers define notification integrations
receivers:
- name: 'email-notifications'
email_configs:
- to: 'admin@meldestelle.at'
send_resolved: true
- name: 'slack-critical'
slack_configs:
- api_url: '${SLACK_WEBHOOK_URL_CRITICAL}'
channel: '${SLACK_CHANNEL_CRITICAL:-#alerts-critical}'
send_resolved: true
title: '{{ .CommonAnnotations.summary }}'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Severity:* {{ .Labels.severity }}
*Instance:* {{ .Labels.instance }}
{{ end }}
- name: 'slack-warnings'
slack_configs:
- api_url: '${SLACK_WEBHOOK_URL_WARNINGS}'
channel: '${SLACK_CHANNEL_WARNINGS:-#alerts-warnings}'
send_resolved: true
title: '{{ .CommonAnnotations.summary }}'
text: >-
{{ range .Alerts }}
*Alert:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
*Severity:* {{ .Labels.severity }}
*Instance:* {{ .Labels.instance }}
{{ end }}
+13
View File
@@ -0,0 +1,13 @@
---
## Default Elasticsearch configuration
cluster.name: "meldestelle-elk"
network.host: 0.0.0.0
# Minimum memory requirements
discovery.type: single-node
# X-Pack security disabled for development
xpack.security.enabled: false
# Enable monitoring
xpack.monitoring.collection.enabled: true
+51
View File
@@ -0,0 +1,51 @@
input {
# TCP input for logback appender
tcp {
port => 5000
codec => json_lines
}
# File input for server logs
file {
path => "/var/log/meldestelle/*.log"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
if [type] == "syslog" {
grok {
match => { "message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
add_field => [ "received_at", "%{@timestamp}" ]
add_field => [ "received_from", "%{host}" ]
}
date {
match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
}
}
# Parse JSON logs
if [message] =~ /^\{.*\}$/ {
json {
source => "message"
}
}
# Add application name
mutate {
add_field => { "application" => "meldestelle" }
}
}
output {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "meldestelle-logs-%{+YYYY.MM.dd}"
}
# For debugging
stdout {
codec => rubydebug
}
}
@@ -0,0 +1,389 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Meldestelle Application Overview Dashboard - Key metrics and health indicators",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(http_server_requests_seconds_count{application=\"meldestelle\"}[5m])",
"interval": "",
"legendFormat": "{{method}} {{uri}}",
"refId": "A"
}
],
"title": "HTTP Request Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"values": false,
"calcs": [
"lastNotNull"
],
"fields": ""
},
"textMode": "auto"
},
"pluginVersion": "8.5.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "up{application=\"meldestelle\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Application Status",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.95, rate(http_server_requests_seconds_bucket{application=\"meldestelle\"}[5m])) * 1000",
"interval": "",
"legendFormat": "95th percentile",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.50, rate(http_server_requests_seconds_bucket{application=\"meldestelle\"}[5m])) * 1000",
"interval": "",
"legendFormat": "50th percentile",
"refId": "B"
}
],
"title": "HTTP Response Times",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(http_server_requests_seconds_count{application=\"meldestelle\",status=~\"[45].*\"}[5m]) / rate(http_server_requests_seconds_count{application=\"meldestelle\"}[5m]) * 100",
"interval": "",
"legendFormat": "Error Rate",
"refId": "A"
}
],
"title": "Error Rate",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"meldestelle",
"application",
"overview"
],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meldestelle - Application Overview",
"uid": "meldestelle-app-overview",
"version": 1,
"weekStart": ""
}
@@ -0,0 +1,599 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Infrastructure Components Dashboard - Monitoring of PostgreSQL, Redis, Kafka, and other supporting services",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 1
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"values": false,
"calcs": [
"lastNotNull"
],
"fields": ""
},
"textMode": "auto"
},
"pluginVersion": "8.5.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "up{job=\"postgres\"}",
"interval": "",
"legendFormat": "PostgreSQL",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "up{job=\"redis\"}",
"interval": "",
"legendFormat": "Redis",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "up{job=\"kafka\"}",
"interval": "",
"legendFormat": "Kafka",
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "up{job=\"keycloak\"}",
"interval": "",
"legendFormat": "Keycloak",
"refId": "D"
}
],
"title": "Infrastructure Services Status",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "100 - (avg(rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
"interval": "",
"legendFormat": "CPU Usage",
"refId": "A"
}
],
"title": "System CPU Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes",
"interval": "",
"legendFormat": "Memory Used",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "node_memory_MemTotal_bytes",
"interval": "",
"legendFormat": "Memory Total",
"refId": "B"
}
],
"title": "System Memory Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 12
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "pg_stat_database_numbackends{job=\"postgres\"}",
"interval": "",
"legendFormat": "{{datname}}",
"refId": "A"
}
],
"title": "PostgreSQL Active Connections",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 12
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "redis_connected_clients{job=\"redis\"}",
"interval": "",
"legendFormat": "Connected Clients",
"refId": "A"
}
],
"title": "Redis Connected Clients",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 12
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "kafka_server_brokertopicmetrics_messagesin_total{job=\"kafka\"}",
"interval": "",
"legendFormat": "{{topic}}",
"refId": "A"
}
],
"title": "Kafka Messages In",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": [
"meldestelle",
"infrastructure",
"postgres",
"redis",
"kafka"
],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meldestelle - Infrastructure Components",
"uid": "meldestelle-infrastructure",
"version": 1,
"weekStart": ""
}
@@ -0,0 +1,659 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_used_bytes{area=\"heap\"}",
"legendFormat": "Used Heap",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_committed_bytes{area=\"heap\"}",
"hide": false,
"legendFormat": "Committed Heap",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_max_bytes{area=\"heap\"}",
"hide": false,
"legendFormat": "Max Heap",
"range": true,
"refId": "C"
}
],
"title": "JVM Heap Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_used_bytes{area=\"nonheap\"}",
"legendFormat": "Used Non-Heap",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_committed_bytes{area=\"nonheap\"}",
"hide": false,
"legendFormat": "Committed Non-Heap",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_memory_max_bytes{area=\"nonheap\"}",
"hide": false,
"legendFormat": "Max Non-Heap",
"range": true,
"refId": "C"
}
],
"title": "JVM Non-Heap Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_threads_live_threads",
"legendFormat": "Live Threads",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_threads_daemon_threads",
"hide": false,
"legendFormat": "Daemon Threads",
"range": true,
"refId": "B"
}
],
"title": "JVM Threads",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "jvm_gc_pause_seconds_sum / jvm_gc_pause_seconds_count",
"legendFormat": "GC Pause Time",
"range": true,
"refId": "A"
}
],
"title": "JVM GC Pause Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "rate(http_server_requests_seconds_count[1m])",
"legendFormat": "{{method}} {{uri}} {{status}}",
"range": true,
"refId": "A"
}
],
"title": "HTTP Request Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"editorMode": "code",
"expr": "http_server_requests_seconds_sum / http_server_requests_seconds_count",
"legendFormat": "{{method}} {{uri}} {{status}}",
"range": true,
"refId": "A"
}
],
"title": "HTTP Request Duration",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meldestelle JVM Metrics",
"uid": "meldestelle-jvm",
"version": 1,
"weekStart": ""
}
@@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'Meldestelle Dashboards'
orgId: 1
folder: 'Meldestelle'
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
@@ -0,0 +1,10 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
version: 1
+185
View File
@@ -0,0 +1,185 @@
# ===================================================================
# Prometheus Development Configuration
# Enhanced monitoring for Meldestelle development environment
# ===================================================================
global:
scrape_interval: 15s # More frequent scraping for development
evaluation_interval: 15s # Faster rule evaluation
external_labels:
cluster: 'meldestelle-dev'
environment: 'development'
# Rule files for alerting (development-friendly)
rule_files:
- "/etc/prometheus/rules.yml"
# Scrape configurations for development services
scrape_configs:
# ===================================================================
# Infrastructure Services
# ===================================================================
# Prometheus self-monitoring
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
scrape_interval: 15s
metrics_path: '/metrics'
# ===================================================================
# Application Services (Spring Boot)
# ===================================================================
# API Gateway
- job_name: 'api-gateway'
static_configs:
- targets: ['api-gateway:8081']
metrics_path: '/actuator/prometheus'
scrape_interval: 10s # More frequent for gateway
scrape_timeout: 5s
params:
format: ['prometheus']
# Auth Server
- job_name: 'auth-server'
static_configs:
- targets: ['auth-server:8081']
metrics_path: '/actuator/prometheus'
scrape_interval: 15s
scrape_timeout: 5s
# Monitoring Server (self-monitoring)
- job_name: 'monitoring-server'
static_configs:
- targets: ['monitoring-server:8083']
metrics_path: '/actuator/prometheus'
scrape_interval: 15s
scrape_timeout: 5s
# Ping Service
- job_name: 'ping-service'
static_configs:
- targets: ['ping-service:8082']
metrics_path: '/actuator/prometheus'
scrape_interval: 10s # Frequent for testing
scrape_timeout: 3s
# ===================================================================
# Infrastructure Monitoring
# ===================================================================
# PostgreSQL Exporter (if deployed)
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
scrape_interval: 30s
scrape_timeout: 10s
# Redis Exporter (if deployed)
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
scrape_interval: 30s
scrape_timeout: 10s
# ===================================================================
# Container and Host Metrics
# ===================================================================
# Docker container metrics via cAdvisor (if deployed)
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 30s
scrape_timeout: 10s
# Node Exporter for host metrics (if deployed)
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 30s
scrape_timeout: 10s
# ===================================================================
# Service Discovery (Consul Integration)
# ===================================================================
# Consul service discovery for dynamic services
- job_name: 'consul-services'
consul_sd_configs:
- server: 'consul:8500'
services: []
relabel_configs:
# Only scrape services that have prometheus.scrape=true
- source_labels: [__meta_consul_service_metadata_prometheus_scrape]
action: keep
regex: "true"
# Use service name as job name
- source_labels: [__meta_consul_service]
target_label: job
# Use custom metrics path if specified
- source_labels: [__meta_consul_service_metadata_prometheus_path]
target_label: __metrics_path__
regex: "(.+)"
# Use custom port if specified
- source_labels: [__address__, __meta_consul_service_metadata_prometheus_port]
target_label: __address__
regex: "([^:]+)(?::\d+)?;(\d+)"
replacement: $1:$2
# ===================================================================
# Development-Specific Configurations
# ===================================================================
# Health check endpoints monitoring
- job_name: 'health-checks'
static_configs:
- targets:
- 'api-gateway:8081'
- 'auth-server:8081'
- 'monitoring-server:8083'
- 'ping-service:8082'
metrics_path: '/actuator/health'
scrape_interval: 30s
scrape_timeout: 5s
# JVM metrics (additional detail for development)
- job_name: 'jvm-metrics'
static_configs:
- targets:
- 'api-gateway:8081'
- 'auth-server:8081'
- 'monitoring-server:8083'
- 'ping-service:8082'
metrics_path: '/actuator/prometheus'
scrape_interval: 30s
params:
match[]:
- 'jvm_*'
- 'process_*'
- 'system_*'
# ===================================================================
# Alerting Configuration (Development-friendly)
# ===================================================================
alerting:
alertmanagers:
- static_configs:
- targets:
# AlertManager not typically used in development
# - alertmanager:9093
# ===================================================================
# Remote Write Configuration (for development data persistence)
# ===================================================================
# Uncomment if you want to send metrics to external storage
# remote_write:
# - url: "http://prometheus-remote-write:8080/api/v1/write"
# queue_config:
# max_samples_per_send: 1000
# max_shards: 200
# capacity: 2500
+123
View File
@@ -0,0 +1,123 @@
# Prometheus Production Configuration
# =============================================================================
# This configuration provides production-ready monitoring setup with
# security, performance optimizations, and comprehensive service discovery
# =============================================================================
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'meldestelle-prod'
environment: 'production'
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "alert_rules.yml"
- "recording_rules.yml"
# Scrape configuration
scrape_configs:
# Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
scrape_interval: 5s
metrics_path: /metrics
# Application metrics
- job_name: 'meldestelle-api'
static_configs:
- targets: ['host.docker.internal:8081']
scrape_interval: 10s
metrics_path: /actuator/prometheus
basic_auth:
username: 'admin'
password: 'CHANGE_ME_METRICS_PASSWORD'
# PostgreSQL metrics (using postgres_exporter)
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
scrape_interval: 30s
# Redis metrics (using redis_exporter)
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
scrape_interval: 30s
# Kafka metrics (using kafka_exporter)
- job_name: 'kafka'
static_configs:
- targets: ['kafka-exporter:9308']
scrape_interval: 30s
# Zookeeper metrics (using zookeeper_exporter)
- job_name: 'zookeeper'
static_configs:
- targets: ['zookeeper-exporter:9141']
scrape_interval: 30s
# Keycloak metrics
- job_name: 'keycloak'
static_configs:
- targets: ['keycloak:8443']
scrape_interval: 30s
metrics_path: /auth/realms/master/metrics
scheme: https
tls_config:
insecure_skip_verify: true
# Nginx metrics (using nginx-prometheus-exporter)
- job_name: 'nginx'
static_configs:
- targets: ['nginx-exporter:9113']
scrape_interval: 30s
# Node exporter for system metrics
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 30s
# cAdvisor for container metrics
- job_name: 'cadvisor'
static_configs:
- targets: ['cadvisor:8080']
scrape_interval: 30s
# Grafana metrics
- job_name: 'grafana'
static_configs:
- targets: ['grafana:3000']
scrape_interval: 30s
metrics_path: /metrics
# Zipkin metrics
- job_name: 'zipkin'
static_configs:
- targets: ['zipkin:9411']
scrape_interval: 30s
metrics_path: /actuator/prometheus
# Remote write configuration (for long-term storage)
# remote_write:
# - url: "https://your-remote-storage/api/v1/write"
# basic_auth:
# username: "your-username"
# password: "your-password"
# Storage configuration
storage:
tsdb:
retention.time: 30d
retention.size: 10GB
wal-compression: true
+41
View File
@@ -0,0 +1,41 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/etc/prometheus/rules/alerts.yml"
# A scrape configuration containing exactly one endpoint to scrape:
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
# Scrape configuration for the Meldestelle application
- job_name: "meldestelle-api"
metrics_path: /actuator/prometheus
scrape_interval: 10s
basic_auth:
username: ${METRICS_USER:-metrics}
password: ${METRICS_PASSWORD:-metrics-password-dev}
static_configs:
- targets: ["server:8081"]
labels:
application: "meldestelle"
service: "api-gateway"
# Node exporter for host metrics (if added later)
# - job_name: "node-exporter"
# static_configs:
# - targets: ["node-exporter:9100"]
@@ -0,0 +1,62 @@
groups:
- name: meldestelle_alerts
rules:
# Alert for high memory usage
- alert: HighMemoryUsage
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage ({{ $value }}%)"
description: "JVM memory usage is above 85% for 5 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
# Alert for high CPU usage
- alert: HighCpuUsage
expr: process_cpu_usage > 0.85
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage ({{ $value }})"
description: "CPU usage is above 85% for 5 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
# Alert for high error rate
- alert: HighErrorRate
expr: sum(rate(http_server_requests_seconds_count{status=~"5.."}[5m])) / sum(rate(http_server_requests_seconds_count[5m])) * 100 > 5
for: 2m
labels:
severity: critical
annotations:
summary: "High error rate ({{ $value }}%)"
description: "Error rate is above 5% for 2 minutes.\n Instance: {{ $labels.instance }}\n Service: {{ $labels.service }}"
# Alert for service unavailability
- alert: ServiceUnavailable
expr: up{job="meldestelle-server"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Service unavailable"
description: "Meldestelle service is down.\n Instance: {{ $labels.instance }}"
# Alert for slow response time
- alert: SlowResponseTime
expr: http_server_requests_seconds_sum / http_server_requests_seconds_count > 1
for: 5m
labels:
severity: warning
annotations:
summary: "Slow response time ({{ $value }}s)"
description: "Average response time is above 1 second for 5 minutes.\n Instance: {{ $labels.instance }}\n Path: {{ $labels.uri }}"
# Alert for high GC pause time
- alert: HighGcPauseTime
expr: jvm_gc_pause_seconds_sum / jvm_gc_pause_seconds_count > 0.5
for: 5m
labels:
severity: warning
annotations:
summary: "High GC pause time ({{ $value }}s)"
description: "Average GC pause time is above 0.5 seconds for 5 minutes.\n Instance: {{ $labels.instance }}"