Commit 6483e63a authored by Andrew Newdigate's avatar Andrew Newdigate

Move from 2.5sigma to 3sigma for warning alerts

parent 6e91fc0d
groups: groups:
- name: service_availability.rules - name: service_availability.rules
rules: rules:
# Availability below 2 sigma # Availability below 3 sigma
- alert: service_availability_out_of_bounds_lower_2sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_availability:ratio gitlab_service_availability:ratio
< <
gitlab_service_availability:ratio:avg_over_time_1w - 2.5 * gitlab_service_availability:ratio:stddev_over_time_1w gitlab_service_availability:ratio:avg_over_time_1w - 3 * gitlab_service_availability:ratio:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -14,7 +14,7 @@ groups: ...@@ -14,7 +14,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The ratio of services that are available to serve the `{{ $labels.type }}` service The ratio of services that are available to serve the `{{ $labels.type }}` service
...@@ -31,12 +31,12 @@ groups: ...@@ -31,12 +31,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md"
# Operation rate above 2 sigma # Operation rate above 3 sigma
- alert: service_ops_out_of_bounds_upper_2sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
> >
gitlab_service_ops:rate:avg_over_time_1w + 2.5 * gitlab_service_ops:rate:stddev_over_time_1w gitlab_service_ops:rate:avg_over_time_1w + 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -44,7 +44,7 @@ groups: ...@@ -44,7 +44,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: upper bound: upper
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is receiving more requests than normal. The `{{ $labels.type }}` service is receiving more requests than normal.
...@@ -62,12 +62,12 @@ groups: ...@@ -62,12 +62,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate below 2 sigma # Operation rate below 3 sigma
- alert: service_ops_out_of_bounds_lower_2sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
< <
gitlab_service_ops:rate:avg_over_time_1w - 2.5 * gitlab_service_ops:rate:stddev_over_time_1w gitlab_service_ops:rate:avg_over_time_1w - 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -75,7 +75,7 @@ groups: ...@@ -75,7 +75,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is receiving fewer requests than normal. The `{{ $labels.type }}` service is receiving fewer requests than normal.
...@@ -92,12 +92,12 @@ groups: ...@@ -92,12 +92,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Apdex lower than 2 sigma # Apdex lower than 3 sigma
- alert: service_apdex_out_of_bounds_lower_2sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_apdex:ratio gitlab_service_apdex:ratio
< <
gitlab_service_apdex:ratio:avg_over_time_1w - 2.5 * gitlab_service_apdex:ratio:stddev_over_time_1w gitlab_service_apdex:ratio:avg_over_time_1w - 3 * gitlab_service_apdex:ratio:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -105,7 +105,7 @@ groups: ...@@ -105,7 +105,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is operating at a slower rate than normal. The `{{ $labels.type }}` service is operating at a slower rate than normal.
...@@ -123,12 +123,12 @@ groups: ...@@ -123,12 +123,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md"
# Error rate exceeds 2 sigma # Error rate exceeds 3 sigma
- alert: service_errors_out_of_bounds_upper_2sigma_5m - alert: service_errors_out_of_bounds_upper_5m
expr: | expr: |
gitlab_service_errors:rate gitlab_service_errors:rate
> >
gitlab_service_errors:rate:avg_over_time_1w + 2.5 * gitlab_service_errors:rate:stddev_over_time_1w gitlab_service_errors:rate:avg_over_time_1w + 3 * gitlab_service_errors:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -136,7 +136,7 @@ groups: ...@@ -136,7 +136,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: upper bound: upper
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is generating more errors than normal. The `{{ $labels.type }}` service is generating more errors than normal.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment