...
 
Commits (3)
groups: groups:
- name: service_availability.rules - name: service_availability.rules
rules: rules:
# Availability below 2 sigma # Warn: Availability below 3 sigma
- alert: service_availability_out_of_bounds_lower_2sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_availability:ratio gitlab_service_availability:ratio
< <
gitlab_service_availability:ratio:avg_over_time_1w - 2.5 * gitlab_service_availability:ratio:stddev_over_time_1w gitlab_service_availability:ratio:avg_over_time_1w - 3 * gitlab_service_availability:ratio:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -14,7 +14,7 @@ groups: ...@@ -14,7 +14,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The ratio of services that are available to serve the `{{ $labels.type }}` service The ratio of services that are available to serve the `{{ $labels.type }}` service
...@@ -31,8 +31,8 @@ groups: ...@@ -31,8 +31,8 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md"
# Availability below 4 sigma # Error: Availability below 4 sigma
- alert: service_availability_out_of_bounds_lower_4sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_availability:ratio gitlab_service_availability:ratio
< <
...@@ -61,12 +61,12 @@ groups: ...@@ -61,12 +61,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-availability.md"
# Operation rate above 2 sigma # Warn: Operation rate above 3 sigma
- alert: service_ops_out_of_bounds_upper_2sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
> >
gitlab_service_ops:rate:avg_over_time_1w + 2.5 * gitlab_service_ops:rate:stddev_over_time_1w gitlab_service_ops:rate:avg_over_time_1w + 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -74,7 +74,7 @@ groups: ...@@ -74,7 +74,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: upper bound: upper
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is receiving more requests than normal. The `{{ $labels.type }}` service is receiving more requests than normal.
...@@ -92,8 +92,8 @@ groups: ...@@ -92,8 +92,8 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate above 4 sigma # Error: Operation rate above 4 sigma
- alert: service_ops_out_of_bounds_upper_4sigma_5m - alert: service_availability_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
> >
...@@ -123,12 +123,12 @@ groups: ...@@ -123,12 +123,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate below 2 sigma # Warn: Operation rate below 3 sigma
- alert: service_ops_out_of_bounds_lower_2sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
< <
gitlab_service_ops:rate:avg_over_time_1w - 2.5 * gitlab_service_ops:rate:stddev_over_time_1w gitlab_service_ops:rate:avg_over_time_1w - 3 * gitlab_service_ops:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -136,7 +136,7 @@ groups: ...@@ -136,7 +136,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is receiving fewer requests than normal. The `{{ $labels.type }}` service is receiving fewer requests than normal.
...@@ -153,8 +153,8 @@ groups: ...@@ -153,8 +153,8 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Operation rate below 4 sigma # Error: Operation rate below 4 sigma
- alert: service_ops_out_of_bounds_lower_4sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_ops:rate gitlab_service_ops:rate
< <
...@@ -183,12 +183,12 @@ groups: ...@@ -183,12 +183,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-ops-rate.md"
# Apdex lower than 2 sigma # Warn: Apdex lower than 3 sigma
- alert: service_apdex_out_of_bounds_lower_2sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_apdex:ratio gitlab_service_apdex:ratio
< <
gitlab_service_apdex:ratio:avg_over_time_1w - 2.5 * gitlab_service_apdex:ratio:stddev_over_time_1w gitlab_service_apdex:ratio:avg_over_time_1w - 3 * gitlab_service_apdex:ratio:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -196,7 +196,7 @@ groups: ...@@ -196,7 +196,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: lower bound: lower
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is operating at a slower rate than normal. The `{{ $labels.type }}` service is operating at a slower rate than normal.
...@@ -214,8 +214,8 @@ groups: ...@@ -214,8 +214,8 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md"
# Apdex lower than 4 sigma # Error: Apdex lower than 4 sigma
- alert: service_apdex_out_of_bounds_lower_4sigma_5m - alert: service_ops_out_of_bounds_lower_5m
expr: | expr: |
gitlab_service_apdex:ratio gitlab_service_apdex:ratio
< <
...@@ -245,12 +245,12 @@ groups: ...@@ -245,12 +245,12 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-apdex.md"
# Error rate exceeds 2 sigma # Warn: Error rate exceeds 3 sigma
- alert: service_errors_out_of_bounds_upper_2sigma_5m - alert: service_errors_out_of_bounds_upper_5m
expr: | expr: |
gitlab_service_errors:rate gitlab_service_errors:rate
> >
gitlab_service_errors:rate:avg_over_time_1w + 2.5 * gitlab_service_errors:rate:stddev_over_time_1w gitlab_service_errors:rate:avg_over_time_1w + 3 * gitlab_service_errors:rate:stddev_over_time_1w
for: 5m for: 5m
labels: labels:
rules_domain: general rules_domain: general
...@@ -258,7 +258,7 @@ groups: ...@@ -258,7 +258,7 @@ groups:
severity: warn severity: warn
period: 5m period: 5m
bound: upper bound: upper
threshold_sigma: "2.5" threshold_sigma: "3"
annotations: annotations:
description: | description: |
The `{{ $labels.type }}` service is generating more errors than normal. The `{{ $labels.type }}` service is generating more errors than normal.
...@@ -275,8 +275,8 @@ groups: ...@@ -275,8 +275,8 @@ groups:
link1_title: "Definition" link1_title: "Definition"
link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-errors.md" link1_url: "https://gitlab.com/gitlab-com/runbooks/blob/master/troubleshooting/definition-service-errors.md"
# Error rate exceeds 4 sigma # Error: Error rate exceeds 4 sigma
- alert: service_errors_out_of_bounds_upper_4sigma_5m - alert: service_errors_out_of_bounds_upper_5m
expr: | expr: |
gitlab_service_errors:rate gitlab_service_errors:rate
> >
......