Commit a0bda0f4 authored by Andrew Newdigate's avatar Andrew Newdigate

Manage pingdom checks as code

parent fbc3a73e
......@@ -26,3 +26,12 @@ deploy_elastic_watcher_updates:
- master
variables:
- $ES_URL
deploy_pingdom_checks:
image: golang:1.11
script:
- cd pingdom
- go run pingdom.go
only:
refs:
- master
# Pingdom
## How to configure checks in Pingdom
* Pingdom checks are configured in the `pingdom/pingdom.yml` file
* When changes to this file are merged to `master`, the `deploy_pingdom_checks` GitLab CI will execute.
* This job will perform 3 tasks:
* It will insert any new checks that have been added to the file. These checks will be prefixed with `check:`
* It will remove any checks in Pingdom with the prefix `check:` that are not in this file
* It will update any other checks from `pingdom/pingdom.yml`
### `pingdom.yml` Details
The `pingdom.yml` is structured as follows:
```yaml
unique_tag: "pingdom-automated" # This is a tag which only checks in this file should include
defaults:
timeout_ms: 2000 # The default timeout in milliseconds
resolution_minutes: 5 # The amount of time before raising an alert
integrations: # This maps from the names we use in this document to the IDS pingdom needs
- name: pagerduty
id: 65172 # This can be found in the URL when editing an integration
checks:
# Each check has the following structure
- url: https://gitlab.com/gitlab-org/gitlab-ce/
timeout_ms: 5000 # The timeout for this check
notify_when_restored: true # Send an alert when service is restored
tags: # Any additional tags to add to the check
- gitaly
- database
teams: # Teams to associate the check with. See Pingdom for a list of teams
- Infrastructure
integrations:
- pagerduty
```
module gitlab.com/gitlab-com/runbooks/pingdom
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russellcardullo/go-pingdom v0.0.0-20181021024747-0897d314d9a6
github.com/stretchr/testify v1.2.2 // indirect
gopkg.in/yaml.v2 v2.2.1
)
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russellcardullo/go-pingdom v0.0.0-20181021024747-0897d314d9a6 h1:O0IZh+5KFeVGnvCV7Fv0zhoX0Tv3K1OdRqSkByJRcbM=
github.com/russellcardullo/go-pingdom v0.0.0-20181021024747-0897d314d9a6/go.mod h1:AB77QAxQWZGYMKErGN6wnG3ycfMQI+M3zU8qYM/1ZY0=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
package main
import (
"flag"
"fmt"
"io/ioutil"
"net/url"
"os"
"strconv"
"strings"
pingdom "github.com/russellcardullo/go-pingdom/pingdom"
yaml "gopkg.in/yaml.v2"
)
// PingdomCheckDefaults represents the default values
type PingdomCheckDefaults struct {
TimeoutMS int `yaml:"timeout_ms"`
ResolutionMinutes int `yaml:"resolution_minutes"`
}
// PingdomCheck represents an individual check
type PingdomCheck struct {
URL string
TimeoutMS int `yaml:"timeout_ms"`
ResolutionMinutes int `yaml:"resolution_minutes"`
Teams []string `yaml:"teams"`
Tags []string `yaml:"tags"`
Integrations []string `yaml:"integrations"`
NotifyWhenRestored bool `yaml:"notify_when_restored"`
}
// PingdomChecks represents the YAML config structure
type PingdomChecks struct {
UniqueTag string `yaml:"unique_tag"`
Defaults PingdomCheckDefaults
Integrations []struct {
Name string `yaml:"name"`
ID int `yaml:"id"`
}
Checks []PingdomCheck
}
func (c PingdomCheck) name() string {
return fmt.Sprintf("check:%v", c.URL)
}
func (c PingdomCheck) hostname() string {
u, err := url.Parse(c.URL)
if err != nil {
panic(err)
}
return u.Hostname()
}
func (c PingdomCheck) encryption() bool {
u, err := url.Parse(c.URL)
if err != nil {
panic(err)
}
return u.Scheme == "https"
}
func (c PingdomCheck) path() string {
u, err := url.Parse(c.URL)
if err != nil {
panic(err)
}
return u.Path + u.RawQuery
}
func (c PingdomCheck) getCheck(config PingdomChecks, teamMap map[string]pingdom.TeamResponse, integrationIDMap map[string]int) pingdom.Check {
timeoutMS := c.TimeoutMS
if timeoutMS == 0 {
timeoutMS = config.Defaults.TimeoutMS
}
if timeoutMS == 0 {
timeoutMS = 5000
}
resolutionMinutes := c.ResolutionMinutes
if resolutionMinutes == 0 {
resolutionMinutes = config.Defaults.ResolutionMinutes
}
if resolutionMinutes == 0 {
resolutionMinutes = 5
}
teamIds := []int{}
for _, v := range c.Teams {
team, ok := teamMap[v]
if !ok {
panic("Unable to find team " + v)
}
teamID, err := strconv.Atoi(team.ID)
if err != nil {
panic("TeamID is not an integer: " + team.ID)
}
teamIds = append(teamIds, teamID)
}
integrationIDs := []int{}
for _, v := range c.Integrations {
integrationID, ok := integrationIDMap[v]
if !ok {
panic("Unable to find integration " + v)
}
integrationIDs = append(integrationIDs, integrationID)
}
tags := []string{config.UniqueTag}
for _, v := range c.Tags {
if v != "" {
tags = append(tags, v)
}
}
return &pingdom.HttpCheck{
Name: c.name(),
Hostname: c.hostname(),
Url: c.path(),
Encryption: c.encryption(),
Resolution: resolutionMinutes,
ResponseTimeThreshold: timeoutMS,
Tags: strings.Join(tags, ","),
TeamIds: teamIds,
IntegrationIds: integrationIDs,
NotifyWhenBackup: c.NotifyWhenRestored,
}
}
func findChecksForRemoval(configMap map[string]PingdomCheck, deployedChecks map[string]pingdom.CheckResponse) []pingdom.CheckResponse {
var result []pingdom.CheckResponse
for k, v := range deployedChecks {
if _, ok := configMap[k]; !ok {
result = append(result, v)
}
}
return result
}
func findChecksForUpdate(configMap map[string]PingdomCheck, deployedChecks map[string]pingdom.CheckResponse) []pingdom.CheckResponse {
var result []pingdom.CheckResponse
for k, v := range deployedChecks {
if _, ok := configMap[k]; ok {
result = append(result, v)
}
}
return result
}
func findChecksForInsertion(configMap map[string]PingdomCheck, deployedChecks map[string]pingdom.CheckResponse) []PingdomCheck {
var result []PingdomCheck
for _, v := range configMap {
_, present := deployedChecks[v.name()]
if !present {
fmt.Printf("%v has not been deployed: %v\n", v.name(), deployedChecks)
result = append(result, v)
}
}
return result
}
func main() {
configurationFile := flag.String("config", "pingdom.yml", "Configuration File")
yamlFile, err := ioutil.ReadFile(*configurationFile)
if err != nil {
panic(err)
}
var pingdomChecks PingdomChecks
err = yaml.Unmarshal(yamlFile, &pingdomChecks)
fmt.Printf("%+v\n", pingdomChecks)
configMap := make(map[string]PingdomCheck)
for _, v := range pingdomChecks.Checks {
configMap[v.name()] = v
}
integrationIdMap := make(map[string]int)
for _, v := range pingdomChecks.Integrations {
integrationIdMap[v.Name] = v.ID
}
client := pingdom.NewMultiUserClient(os.Getenv("PINGDOM_USERNAME"), os.Getenv("PINGDOM_PASSWORD"), os.Getenv("PINGDOM_APPKEY"), os.Getenv("PINGDOM_ACCOUNT_EMAIL"))
teams, err := client.Teams.List()
if err != nil {
panic(err)
}
teamMap := make(map[string]pingdom.TeamResponse)
for _, v := range teams {
teamMap[v.Name] = v
}
checks, err := client.Checks.List()
if err != nil {
panic(err)
}
deployedChecks := make(map[string]pingdom.CheckResponse)
for _, v := range checks {
if strings.HasPrefix(v.Name, "check:") {
deployedChecks[v.Name] = v
}
}
forRemoval := findChecksForRemoval(configMap, deployedChecks)
forUpdate := findChecksForUpdate(configMap, deployedChecks)
forInsertion := findChecksForInsertion(configMap, deployedChecks)
// Do the inserts
for _, v := range forInsertion {
check, err := client.Checks.Create(v.getCheck(pingdomChecks, teamMap, integrationIdMap))
if err != nil {
panic(err)
}
fmt.Println("Created check:", check) // {ID, Name}
}
// Do the updates
for _, update := range forUpdate {
v, ok := configMap[update.Name]
if !ok {
panic("Unable to lookup " + update.Name)
}
check, err := client.Checks.Update(update.ID, v.getCheck(pingdomChecks, teamMap, integrationIdMap))
if err != nil {
panic(err)
}
fmt.Println("Updated check:", check) // {ID, Name}
}
// Do the deletions
for _, d := range forRemoval {
check, err := client.Checks.Delete(d.ID)
if err != nil {
panic(err)
}
fmt.Println("Deleted check:", check) // {ID, Name}
}
}
# See ../howto/pingdom.md for details of how to use this
unique_tag: "pingdom-automated"
defaults:
timeout_ms: 2000
integrations:
- name: pagerduty
id: 65172
checks:
- url: https://gitlab.com/gitlab-org/gitlab-ce/
timeout_ms: 5000
notify_when_restored: true
tags:
- gitaly
- database
teams:
- Infrastructure
integrations:
- pagerduty
- url: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/
timeout_ms: 5000
notify_when_restored: true
tags:
- gitaly
- database
teams:
- Infrastructure
integrations:
- pagerduty
- url: https://gitlab.com/gitlab-org/gitlab-ce/tree/master
timeout_ms: 5000
notify_when_restored: true
tags:
- gitaly
teams:
- Infrastructure
integrations:
- pagerduty
- url: https://gitlab.com/gitlab-org/gitlab-ce/issues
timeout_ms: 5000
notify_when_restored: true
tags:
- database
teams:
- Infrastructure
integrations:
- pagerduty
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment