Commit 5357d167 authored by Emanuel Calvo's avatar Emanuel Calvo Committed by Matteo Melli
Browse files

Added files and continued developing runbook

parent 72b45095
......@@ -5,6 +5,23 @@ AZURE_HOST_PREFIX="postgres-0"
AZURE_HOST_SUFFIX=".db.prd.gitlab.com"
GCP_HOST_PREFIX="postgres-0"
GCP_HOST_SUFFIX="-db-gprd.c.gitlab-production.internal"
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}1${GCP_HOST_SUFFIX}"
AZURE_SLAVES=(
postgres-02.db.prd.gitlab.com
postgres-03.db.prd.gitlab.com
postgres-04.db.prd.gitlab.com
postgres-01.db.prd.gitlab.com
)
GCP_SLAVES=(
postgres-02-db-gprd.c.gitlab-production.internal
postgres-03-db-gprd.c.gitlab-production.internal
postgres-01-db-gprd.c.gitlab-production.internal
postgres-04-db-gprd.c.gitlab-production.internal
)
# Generic
max_rep_delay=10
......@@ -4,6 +4,21 @@ AZURE_HOST_PREFIX="postgres0"
AZURE_HOST_SUFFIX=".db.stg.gitlab.com"
GCP_HOST_PREFIX="postgres-0"
GCP_HOST_SUFFIX="-db-gstg.c.gitlab-staging-1.internal"
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}2${GCP_HOST_SUFFIX}"
AZURE_SLAVES=(
postgres01.db.stg.gitlab.com
postgres02.db.stg.gitlab.com
)
GCP_SLAVES=(
postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-01-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
)
# Generic
max_rep_delay=10
\ No newline at end of file
......@@ -2,7 +2,7 @@
[[ $# -lt 1 ]] && { echo "Specify the environment"; exit 1 ; }
ENVIRONMENT=$1
export ENVIRONMENT=$1
source .env_${ENVIRONMENT} # That is, .env_staging or .env_production
source utilities
......
#!/bin/bash
shopt -s expand_aliases
alias ssh_remote="ssh "
source env_${1}
ssh_remote "${AZURE_MASTER}" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-ctl repmgr cluster show
EOF
)
\ No newline at end of file
#!/bin/bash
shopt -s expand_aliases
alias ssh_remote="ssh "
source env_${1}
ssh_remote "${AZURE_MASTER}" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-ctl repmgr cluster show
EOF
)
shopt -s expand_aliases
alias ssh_remote="ssh "
export steps=(
000_create-tombstone-table
001_check-gcp-replication-delay
002_disable-chef
003_disable-consul
004_disable-automatic-failover
005_forbid-writes-to-current-master
006_convert-azure-master-to-standby
007_check-gcp-nodes-has-same-azure-lsn
008_perform-gcp-candidate-master-promote
009_check-gcp-candidate-master-is-master
010_reduce-statement-timeout
011_configure-pgbouncer-for-gcp
012_ensure-priority-is-updated-in-repmgr
013_update-chef-cookbook
014_enable-automatic-failover-on-gcp-only
015_enable-consul-on-gcp-only
016_enable-chef-on-gcp-only
)
create_tombstone() {
ssh_remote "${1}" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
}
#######################################
# Check that GCP "main" replica is not lagging too much
# Globals:
# AZURE_MASTER
# GCP_MASTER_CANDIDATE
# Arguments:
# None
# Returns:
# None
#######################################
handle_gcp_replication_delay() { # put definition to the top
max_rep_delay=10 # TODO(NikolayS) is 10s ok? Double-check after turning SR on
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone";
sudo -u gitlab-psql gitlab-psql postgres \
-c "create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${GITLAB_ENV}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE. The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
echo "New tombstone message arrived to $GCP_MASTER_CANDIDATE, continue."
break
fi
done
}
function 000_create-tombstone-table(){
create_tombstone $AZURE_MASTER
return 0
}
function 001_check-gcp-replication-delay(){
return 0
}
function 002_disable-chef(){
return 0
}
function 003_disable-consul(){
return 0
}
function 004_disable-automatic-failover(){
return 0
}
function 005_forbid-writes-to-current-master(){
return 0
}
function 006_convert-azure-master-to-standby(){
return 0
}
function 007_check-gcp-nodes-has-same-azure-lsn(){
return 0
}
function 008_perform-gcp-candidate-master-promote(){
return 0
}
function 009_check-gcp-candidate-master-is-master(){
return 0
}
function 010_reduce-statement-timeout(){
return 0
}
function 011_configure-pgbouncer-for-gcp(){
return 0
}
function 012_ensure-priority-is-updated-in-repmgr(){
return 0
}
function 013_update-chef-cookbook(){
return 0
}
function 014_enable-automatic-failover-on-gcp-only(){
return 0
}
function 015_enable-consul-on-gcp-only(){
return 0
}
function 016_enable-chef-on-gcp-only(){
return 0
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment