...
 
Commits (3)
groups:
- name: service_availability.rules
rules:
# Availability below 2 sigma
- alert: service_availability_out_of_bounds_lower_2sigma_5m
# Warn: Availability below 3 sigma
- alert: service_availability_out_of_bounds_lower_5m
expr: |
gitlab_service_availability:ratio
<
gitlab_service_availability:ratio:avg_over_time_1w - 2.5 * gitlab_service_availability:ratio:stddev_over_time_1w
gitlab_service_availability:ratio:avg_over_time_1w - 3 * gitlab_service_availability:ratio:stddev_over_time_1w
for: 5m
labels:
rules_domain: general
......@@ -14,7 +14,7 @@ groups:
severity: warn
period: 5m
bound: lower
threshold_sigma: "2.5"
threshold_sigma: "3"
annotations:
description: |
The ratio of services that are available to serve the `{{ $labels.type }}` service
......@@ -31,8 +31,8 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md"
# Availability below 4 sigma
- alert: service_availability_out_of_bounds_lower_4sigma_5m
# Error: Availability below 4 sigma
- alert: service_availability_out_of_bounds_lower_5m
expr: |
gitlab_service_availability:ratio
<
......@@ -61,12 +61,12 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md"
# Operation rate above 2 sigma
- alert: service_ops_out_of_bounds_upper_2sigma_5m
# Warn: Operation rate above 3 sigma
- alert: service_availability_out_of_bounds_lower_5m
expr: |
gitlab_service_ops:rate
>
gitlab_service_ops:rate:avg_over_time_1w + 2.5 * gitlab_service_ops:rate:stddev_over_time_1w
gitlab_service_ops:rate:avg_over_time_1w + 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m
labels:
rules_domain: general
......@@ -74,7 +74,7 @@ groups:
severity: warn
period: 5m
bound: upper
threshold_sigma: "2.5"
threshold_sigma: "3"
annotations:
description: |
The `{{ $labels.type }}` service is receiving more requests than normal.
......@@ -92,8 +92,8 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate above 4 sigma
- alert: service_ops_out_of_bounds_upper_4sigma_5m
# Error: Operation rate above 4 sigma
- alert: service_availability_out_of_bounds_lower_5m
expr: |
gitlab_service_ops:rate
>
......@@ -123,12 +123,12 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate below 2 sigma
- alert: service_ops_out_of_bounds_lower_2sigma_5m
# Warn: Operation rate below 3 sigma
- alert: service_ops_out_of_bounds_lower_5m
expr: |
gitlab_service_ops:rate
<
gitlab_service_ops:rate:avg_over_time_1w - 2.5 * gitlab_service_ops:rate:stddev_over_time_1w
gitlab_service_ops:rate:avg_over_time_1w - 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m
labels:
rules_domain: general
......@@ -136,7 +136,7 @@ groups:
severity: warn
period: 5m
bound: lower
threshold_sigma: "2.5"
threshold_sigma: "3"
annotations:
description: |
The `{{ $labels.type }}` service is receiving fewer requests than normal.
......@@ -153,8 +153,8 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate below 4 sigma
- alert: service_ops_out_of_bounds_lower_4sigma_5m
# Error: Operation rate below 4 sigma
- alert: service_ops_out_of_bounds_lower_5m
expr: |
gitlab_service_ops:rate
<
......@@ -183,12 +183,12 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Apdex lower than 2 sigma
- alert: service_apdex_out_of_bounds_lower_2sigma_5m
# Warn: Apdex lower than 3 sigma
- alert: service_ops_out_of_bounds_lower_5m
expr: |
gitlab_service_apdex:ratio
<
gitlab_service_apdex:ratio:avg_over_time_1w - 2.5 * gitlab_service_apdex:ratio:stddev_over_time_1w
gitlab_service_apdex:ratio:avg_over_time_1w - 3 * gitlab_service_apdex:ratio:stddev_over_time_1w
for: 5m
labels:
rules_domain: general
......@@ -196,7 +196,7 @@ groups:
severity: warn
period: 5m
bound: lower
threshold_sigma: "2.5"
threshold_sigma: "3"
annotations:
description: |
The `{{ $labels.type }}` service is operating at a slower rate than normal.
......@@ -214,8 +214,8 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md"
# Apdex lower than 4 sigma
- alert: service_apdex_out_of_bounds_lower_4sigma_5m
# Error: Apdex lower than 4 sigma
- alert: service_ops_out_of_bounds_lower_5m
expr: |
gitlab_service_apdex:ratio
<
......@@ -245,12 +245,12 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md"
# Error rate exceeds 2 sigma
- alert: service_errors_out_of_bounds_upper_2sigma_5m
# Warn: Error rate exceeds 3 sigma
- alert: service_errors_out_of_bounds_upper_5m
expr: |
gitlab_service_errors:rate
>
gitlab_service_errors:rate:avg_over_time_1w + 2.5 * gitlab_service_errors:rate:stddev_over_time_1w
gitlab_service_errors:rate:avg_over_time_1w + 3 * gitlab_service_errors:rate:stddev_over_time_1w
for: 5m
labels:
rules_domain: general
......@@ -258,7 +258,7 @@ groups:
severity: warn
period: 5m
bound: upper
threshold_sigma: "2.5"
threshold_sigma: "3"
annotations:
description: |
The `{{ $labels.type }}` service is generating more errors than normal.
......@@ -275,8 +275,8 @@ groups:
link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-errors.md"
# Error rate exceeds 4 sigma
- alert: service_errors_out_of_bounds_upper_4sigma_5m
# Error: Error rate exceeds 4 sigma
- alert: service_errors_out_of_bounds_upper_5m
expr: |
gitlab_service_errors:rate
>
......