Commit 5f6cf999 authored by Matteo Melli's avatar Matteo Melli
Browse files

Added replication topology check

parent 246e0b13
Pipeline #88671 failed with stage
in 15 seconds
......@@ -126,6 +126,7 @@
1. [ ] 🐘 {+ Database-Wrangler +}: Ensure repmgr has desired states on all database nodes
* Ensure repmgr has desired states on all database nodes. `/opt/gitlab-migration/bin/scripts/01_preflight/050_configuration_checks/110-check-repmgr-state.sh`
* Check replication topology is the expected. `/opt/gitlab-migration/bin/scripts/01_preflight/050_configuration_checks/111-check-topology.sh`
## Ensure Geo replication is up to date
......
# Production
N_OF_HOSTS=4
AZURE_HOST_PREFIX="postgres-0"
AZURE_HOST_SUFFIX=".db.prd.gitlab.com"
GCP_HOST_PREFIX="postgres-0"
GCP_HOST_SUFFIX="-db-gprd.c.gitlab-production.internal"
AZURE_MASTER="postgres-02.db.prd.gitlab.com"
GCP_MASTER_CANDIDATE="postgres-02-db-gprd.c.gitlab-production.internal"
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}1${GCP_HOST_SUFFIX}"
AZURE_SLAVES=(
AZURE_HOSTS=(
postgres-02.db.prd.gitlab.com
postgres-03.db.prd.gitlab.com
postgres-04.db.prd.gitlab.com
postgres-01.db.prd.gitlab.com
)
GCP_SLAVES=(
GCP_HOSTS=(
postgres-02-db-gprd.c.gitlab-production.internal
postgres-03-db-gprd.c.gitlab-production.internal
postgres-01-db-gprd.c.gitlab-production.internal
postgres-04-db-gprd.c.gitlab-production.internal
)
AZURE_SLAVES=(
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$AZURE_MASTER" ]
then
echo "$host"
fi
done)
)
GCP_SLAVES=(
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$GCP_MASTER_CANDIDATE" ]
then
echo "$host"
fi
done)
)
AZURE_PGBOUNCERS=(
pgbouncer-01.db.prd.gitlab.com
pgbouncer-02.db.prd.gitlab.com
)
GCP_PGBOUNCERS=(
pgbouncer-01-db-gprd.c.gitlab-production.internal
pgbouncer-02-db-gprd.c.gitlab-production.internal
)
# Generic
max_rep_delay=10
# Staging
N_OF_HOSTS=2
AZURE_HOST_PREFIX="postgres0"
AZURE_HOST_SUFFIX=".db.stg.gitlab.com"
GCP_HOST_PREFIX="postgres-0"
GCP_HOST_SUFFIX="-db-gstg.c.gitlab-staging-1.internal"
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}2${GCP_HOST_SUFFIX}"
AZURE_MASTER="postgres02.db.stg.gitlab.com"
GCP_MASTER_CANDIDATE="postgres-02-db-gstg.c.gitlab-staging-1.internal"
AZURE_HOSTS=(
......@@ -17,17 +11,27 @@ postgres02.db.stg.gitlab.com
GCP_HOSTS=(
postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-01-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
)
AZURE_SLAVES=(
postgres01.db.stg.gitlab.com
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$AZURE_MASTER" ]
then
echo "$host"
fi
done)
)
GCP_SLAVES=(
postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$GCP_MASTER_CANDIDATE" ]
then
echo "$host"
fi
done)
)
AZURE_PGBOUNCERS=(
......@@ -38,5 +42,3 @@ pgbouncer-01-db-gstg.c.gitlab-staging-1.internal
)
# Generic
max_rep_delay=10
\ No newline at end of file
# dbteam nodes
N_OF_HOSTS=2
AZURE_HOST_PREFIX="postgres0"
AZURE_HOST_SUFFIX=".db.stg.gitlab.com"
GCP_HOST_PREFIX="postgres-0"
GCP_HOST_SUFFIX="-db-gstg.c.gitlab-staging-1.internal"
# Nodes 1 are used by Nik for IO testing
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}2${GCP_HOST_SUFFIX}"
AZURE_MASTER="postgres-dbteam-02.db.stg.gitlab.com"
GCP_MASTER_CANDIDATE="postgres-dbteam-02-db-gstg.c.gitlab-staging-1.internal"
AZURE_HOSTS=(
......@@ -18,25 +11,30 @@ postgres-dbteam-05.db.stg.gitlab.com
)
GCP_HOSTS=(
postgres-dbteam-01-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-05-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-04-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-02-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-03-db-gstg.c.gitlab-staging-1.internal
)
AZURE_SLAVES=(
postgres-dbteam-04.db.stg.gitlab.com
postgres-dbteam-03.db.stg.gitlab.com
postgres-dbteam-05.db.stg.gitlab.com
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$AZURE_MASTER" ]
then
echo "$host"
fi
done)
)
GCP_SLAVES=(
postgres-dbteam-01-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-05-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-04-db-gstg.c.gitlab-staging-1.internal
postgres-dbteam-03-db-gstg.c.gitlab-staging-1.internal
$(for host in "${GCP_HOSTS[@]}"
do
if [ "$host" != "$GCP_MASTER_CANDIDATE" ]
then
echo "$host"
fi
done)
)
## Still need to figure out which are the pgbouncers
......@@ -49,5 +47,3 @@ postgres-dbteam-05-db-gstg.c.gitlab-staging-1.internal
)
# Generic
max_rep_delay=10
\ No newline at end of file
#!/bin/bash
set -eu
all_ok=true
for host in "${AZURE_HOSTS[@]}"
do
echo "Checking replication for host $host"
echo
if [ "$host" != "$AZURE_MASTER" ]
then
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select pg_is_in_recovery()' | grep -q 't'
then
>&2 echo "Host $host is not standby"
all_ok=false
fi
else
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select pg_is_in_recovery()' | grep -q 'f'
then
>&2 echo "Host $host is not master"
all_ok=false
fi
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select count(1) from pg_stat_replication' | grep -q '4'
then
>&2 echo "Host $host is not replicated by 4 nodes:"
>&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -c 'select * from pg_stat_replication'
all_ok=false
fi
for slave_host in "${AZURE_SLAVES[@]}" "$GCP_MASTER_CANDIDATE"
do
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t \
-c "select client_addr||'-'||state from pg_stat_replication"\
| grep -q "$(host "$slave_host"|cut -d ' ' -f 4)-streaming"
>&2 echo "Host $host is not replicated by host $slave_host:"
>&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -c 'select * from pg_stat_replication'
all_ok=false
done
echo
done
for host in "${GCP_HOSTS[@]}"
do
echo "Checking replication for host $host"
echo
if [ "$host" != "$GCP_MASTER_CANDIDATE" ]
then
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select pg_is_in_recovery()' | grep -q 't'
then
>&2 echo "Host $host is not standby"
all_ok=false
fi
else
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select pg_is_in_recovery()' | grep -q 'f'
then
>&2 echo "Host $host is not standby"
all_ok=false
fi
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t -c 'select count(1) from pg_stat_replication' | grep -q '3'
then
>&2 echo "Host $host is not replicated by 3 nodes:"
>&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -c 'select * from pg_stat_replication'
all_ok=false
fi
for slave_host in "${GCP_SLAVES[@]}"
do
if ! ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -A -t \
-c "select client_addr||'-'||state from pg_stat_replication"\
| grep -q "$(host "$slave_host"|cut -d ' ' -f 4)-streaming"
>&2 echo "Host $host is not replicated by host $slave_host:"
>&2 ssh_remote "$host" sudo -u gitlab-psql gitlab-psql -c 'select * from pg_stat_replication'
all_ok=false
done
echo
done
$all_ok
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment