111-check-replication-topology.sh 3.32 KB
Newer Older
1 2 3 4
#!/bin/bash

set -eu

5
# shellcheck source=/dev/null
6 7 8 9 10 11 12 13 14 15 16
source "${BASE}/env_${ENVIRONMENT}"

all_ok=true

for host in "${AZURE_HOSTS[@]}"
do
  echo "Checking replication for host $host"
  echo
  if [ "$host" != "$AZURE_MASTER" ]
  then
    if ! echo "select pg_is_in_recovery()" \
17
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
18 19 20 21 22 23 24
      | grep -q 't'
    then
      >&2 echo "Host $host is not standby"
      all_ok=false
    fi
  else
    if ! echo "select pg_is_in_recovery()" \
25
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
26 27 28 29 30 31
      | grep -q 'f'
    then
      >&2 echo "Host $host is not master"
      all_ok=false
    fi
    if ! echo "select count(1) from pg_stat_replication" \
32
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
33 34 35 36
      | grep -q '4'
    then
      >&2 echo "Host $host is not replicated by 4 nodes:"
      echo "select * from pg_stat_replication" \
37
        | >&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres
38 39 40 41 42
      all_ok=false
    fi
    for slave_host in "${AZURE_SLAVES[@]}" "$GCP_MASTER_CANDIDATE"
    do
      if ! echo "select client_addr||'-'||state from pg_stat_replication" \
43
        | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
44
        | grep -q "$(host -t A "$slave_host"|cut -d ' ' -f 4)/32-streaming"
45 46 47
      then
        >&2 echo "Host $host is not correclty replicated by host $slave_host:"
        echo "select * from pg_stat_replication" \
48
          | >&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres
49 50 51 52 53 54 55 56 57 58 59 60 61 62
        all_ok=false
      fi
    done
  fi
  echo
done

for host in "${GCP_HOSTS[@]}"
do
  echo "Checking replication for host $host"
  echo
  if [ "$host" != "$GCP_MASTER_CANDIDATE" ]
  then
    if ! echo "select pg_is_in_recovery()" \
63
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
64 65 66 67 68 69 70
      | grep -q 't'
    then
      >&2 echo "Host $host is not standby"
      all_ok=false
    fi
  else
    if ! echo "select pg_is_in_recovery()" \
71
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
72 73 74 75 76 77
      | grep -q 't'
    then
      >&2 echo "Host $host is not standby"
      all_ok=false
    fi
    if ! echo "select count(1) from pg_stat_replication" \
78
      | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
79 80 81 82
      | grep -q '3'
    then
      >&2 echo "Host $host is not replicated by 3 nodes:"
      echo "select * from pg_stat_replication" \
83
        | >&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres
84 85 86 87 88
      all_ok=false
    fi
    for slave_host in "${GCP_SLAVES[@]}"
    do
      if ! echo "select client_addr||'-'||state from pg_stat_replication" \
89
        | ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres -A -t \
90
        | grep -q "$(host -t A "$slave_host"|cut -d ' ' -f 4)/32-streaming"
91 92 93
      then
        >&2 echo "Host $host is not correclty replicated by host $slave_host:"
        echo "select * from pg_stat_replication" \
94
          | >&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -v ON_ERROR_STOP=1 -d postgres
95 96 97 98 99 100 101 102
        all_ok=false
      fi
    done
  fi
  echo
done

$all_ok