Commit 233a83ad authored by Matteo Melli's avatar Matteo Melli

Full alpha implementation

parent 8b30abe1
Pipeline #88379 passed with stage
in 32 seconds
......@@ -21,11 +21,11 @@ postgres-03-db-gprd.c.gitlab-production.internal
postgres-01-db-gprd.c.gitlab-production.internal
postgres-04-db-gprd.c.gitlab-production.internal
)
AZURE_PGBOUNCER=(
AZURE_PGBOUNCERS=(
pgbouncer-01.db.prd.gitlab.com
pgbouncer-02.db.prd.gitlab.com
)
GCP_PGBOUNCER=(
GCP_PGBOUNCERS=(
pgbouncer-01-db-gprd.c.gitlab-production.internal
pgbouncer-02-db-gprd.c.gitlab-production.internal
)
......
......@@ -30,10 +30,10 @@ postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
)
AZURE_PGBOUNCER=(
AZURE_PGBOUNCERS=(
pgbouncer-01.db.stg.gitlab.com
)
GCP_PGBOUNCER=(
GCP_PGBOUNCERS=(
pgbouncer-01-db-gstg.c.gitlab-staging-1.internal
)
......
......@@ -7,12 +7,16 @@ export ENVIRONMENT=$1
source .env_${ENVIRONMENT} # That is, .env_staging or .env_production
source utilities
source steps_${ENVIRONMENT}
# Failover steps go here
#
function 00_test(){
echo "Running things inside"
return $1
}
#Check all steps have a script
for step in "${steps[@]}"
do
if ! type "$(step_script "$step")" > /dev/null 2>&1
then
>&2 echo "Function $(step_script "$step") do not exists for step $(step_3digit_number "$step")"
exit 1
fi
done
echo "menu"
do_menu
......
......@@ -4,45 +4,101 @@ export steps=(
002_disable-chef
003_disable-consul
004_disable-automatic-failover
005_forbid-writes-to-current-master
006_convert-azure-master-to-standby
007_check-gcp-nodes-has-same-azure-lsn
008_perform-gcp-candidate-master-promote
009_check-gcp-candidate-master-is-master
010_reduce-statement-timeout
011_configure-pgbouncer-for-gcp
012_ensure-priority-is-updated-in-repmgr
013_update-chef-cookbook
014_enable-automatic-failover-on-gcp-only
015_enable-consul-on-gcp-only
016_enable-chef-on-gcp-only
005_convert-azure-master-to-standby
006_check-gcp-nodes-has-same-azure-lsn
007_perform-gcp-candidate-master-promote
008_check-gcp-candidate-master-is-master
009_enable-automatic-failover-on-gcp-only
010_enable-consul-on-gcp-only
011_enable-chef-on-gcp-only
)
function 000_create-tombstone-table(){
return 0
function create-tombstone-table(){
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
}
function check-gcp-replication-delay(){
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${ENVIRONMENT}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE (GCP MASTER CANDIDATE). The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
echo "New tombstone message arrived to $GCP_MASTER_CANDIDATE."
break
fi
done
}
function 001_check-gcp-replication-delay(){
return 0
function disable-chef(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "stopping chef on $host"
ssh_remote "$host" sudo service chef-client stop
ssh_remote "$host" sudo mv /etc/chef /etc/chef.migration
done
}
function 002_disable-chef(){
return 0
}
function disable-consul(){
for host in "${AZURE_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
function 003_disable-consul(){
return 0
}
for host in "${AZURE_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
function 004_disable-automatic-failover(){
return 0
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
}
function 005_forbid-writes-to-current-master(){
return 0
function disable-automatic-failover(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
for host in "${AZURE_HOSTS[@]}"
do
if [ "$AZURE_MASTER" == "$host" ]
then
continue
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
ssh_remote "$AZURE_MASTER" sudo sv stop /opt/gitlab/sv/repmgrd
}
function 006_convert-azure-master-to-standby(){
function convert-azure-master-to-standby(){
ssh_remote "$AZURE_MASTER" $(cat << EOF
echo "
standby_mode = 'on'
......@@ -56,7 +112,7 @@ EOF
)
}
function 007_check-gcp-nodes-has-same-azure-lsn(){
function check-gcp-nodes-has-same-azure-lsn(){
while true
do
azure_master_lsn="$(ssh_remote "$AZURE_MASTER" $(cat << EOF
......@@ -85,48 +141,53 @@ EOF
done
}
function 008_perform-gcp-candidate-master-promote(){
function perform-gcp-candidate-master-promote(){
ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql /opt/gitlab/embedded/bin/pg_ctl \
promote -D /var/lib/opt/gitlab/postgresql/data;
EOF
)
)
}
function 009_check-gcp-candidate-master-is-master(){
function check-gcp-candidate-master-is-master(){
ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select pg_is_in_recovery()";
EOF
) | grep -q 'f'
}
function 010_reduce-statement-timeout(){
return 0
}
function 011_configure-pgbouncer-for-gcp(){
return 0
}
function 012_ensure-priority-is-updated-in-repmgr(){
return 0
) | grep -q 'f'
}
function 013_update-chef-cookbook(){
return 0
function enable-automatic-failover-on-gcp-only(){
ssh "$GCP_MASTER_CANDIDATE" sudo sv stop /opt/gitlab/sv/repmgrd
for host in "${GCP_HOSTS[@]}"
do
if [ "$GCP_MASTER_CANDIDATE" == "$host" ]
then
continue;
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
}
function 014_enable-automatic-failover-on-gcp-only(){
return 0
}
function enable-consul-on-gcp-only(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
done
function 015_enable-consul-on-gcp-only(){
return 0
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
done
}
function 016_enable-chef-on-gcp-only(){
return 0
function enable-chef-on-gcp-only(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "starting chef on $host"
ssh_remote "$host" sudo mv /etc/chef.migration /etc/chef
ssh_remote "$host" sudo service chef-client start
done
}
This diff is collapsed.
......@@ -15,7 +15,7 @@ function step_number(){
printf "%d" "$(step_3digit_number "$1"|sed 's/0\+//')"
}
function step_name(){
function step_script(){
echo "$1"|cut -d _ -f 2-
}
......@@ -40,7 +40,7 @@ function do_menu(){
echo
for step in "${steps[@]}"
do
echo "$(step_3digit_number "$step")) $(step_name "$step")"
echo "$(step_3digit_number "$step")) $(step_script "$step")"
done
echo
if has_step "$next_step"
......@@ -97,7 +97,7 @@ function do_menu(){
if [ "$key" == "y" ]
then
next_step="$(printf "%03d" "$(($(step_number "$step")+1))")"
do_step "$step"
do_step "$(step_script "$step")"
fi
echo
step=
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment