Tune replication lag alerts for DR replicas.

parent 68cebc72
......@@ -122,7 +122,7 @@ groups:
runbook: troubleshooting/postgres.md#replication-is-lagging-or-has-stopped
title: Postgres Replication lag is over 12 hours on archive recovery replica
- alert: PostgreSQL_ReplicationLagTooLarge_ArchiveReplica
expr: (pg_replication_lag > 43200) and ON(instance) (pg_replication_is_replica{type = "postgres-archive"} == 1)
expr: (pg_replication_lag > 3600) and ON(instance) (pg_replication_is_replica{type = "postgres-archive"} == 1)
for: 5m
labels:
pager: pagerduty
......@@ -132,9 +132,9 @@ groups:
description: Replication lag on server {{$labels.instance}} is currently {{
$value | humanizeDuration }}
runbook: troubleshooting/postgres.md#replication-is-lagging-or-has-stopped
title: Postgres Replication lag is over 12 hours on archive recovery replica
title: Postgres Replication lag is over 1 hour on archive recovery replica
- alert: PostgreSQL_ReplicationLagTooLarge_DelayedReplica
expr: (pg_replication_lag > 43200) and ON(instance) (pg_replication_is_replica{type = "postgres-delayed"} == 1)
expr: (pg_replication_lag > 32400) and ON(instance) (pg_replication_is_replica{type = "postgres-delayed"} == 1)
for: 5m
labels:
pager: pagerduty
......@@ -144,7 +144,7 @@ groups:
description: Replication lag on server {{$labels.instance}} is currently {{
$value | humanizeDuration }}
runbook: troubleshooting/postgres.md#replication-is-lagging-or-has-stopped
title: Postgres Replication lag is over 12 hours on delayed replica
title: Postgres Replication lag is over 9 hours on delayed replica (normal is 8 hours)
- alert: PostgreSQL_ReplicationLagBytesTooLarge
expr: (pg_xlog_position_bytes and pg_replication_is_replica == 0) - ON(environment)
GROUP_RIGHT(instance) (pg_xlog_position_bytes and pg_replication_is_replica{type = "postgres", fqdn != "postgres-01-db-gprd.c.gitlab-production.internal"}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment