Commit 89c85492 authored by Matteo Melli's avatar Matteo Melli
Browse files

Sync steps_staging and steps_production

parent 860c03fe
......@@ -8,7 +8,7 @@ set -eu
export ENVIRONMENT=$1
export GITLAB_ENV=$ENVIRONMENT
source .env_${ENVIRONMENT} # That is, .env_staging or .env_production (test also supported)
source env_${ENVIRONMENT} # That is, .env_staging or .env_production (test also supported)
source utilities
source steps_${ENVIRONMENT}
......
......@@ -13,12 +13,13 @@ export steps=(
011_check-repmgr-master
012_enable-consul-on-gcp-only
013_enable-chef-on-gcp-only
014_restore-could-not-change-directory-to-message
)
function get-rid-of-could-not-change-directory-to-message(){
for host in "$AZURE_HOSTS"
do
ssh_remote "$host" bash -c 'chmod a+x "$HOME"'
ssh_remote "$host" bash -c 'chmod o+x "$HOME"'
done
}
......@@ -211,4 +212,11 @@ function check-repmgr-master(){
>&2 echo "$GCP_MASTER_CANDIDATE is not repmgr master"
return 1
fi
}
\ No newline at end of file
}
function restore-could-not-change-directory-to-message(){
for host in "$AZURE_HOSTS"
do
ssh_remote "$host" bash -c 'chmod o-x "$HOME"'
done
}
export steps=(
000_create-tombstone-table
001_check-gcp-replication-delay
002_disable-chef
003_disable-consul
004_disable-automatic-failover
005_convert-azure-master-to-standby
006_check-gcp-nodes-has-same-azure-lsn
007_perform-gcp-candidate-master-promote
008_check-gcp-candidate-master-is-master
009_enable-automatic-failover-on-gcp-only
010_enable-consul-on-gcp-only
011_enable-chef-on-gcp-only
000_get-rid-of-could-not-change-directory-to-message
001_create-tombstone-table
002_check-gcp-replication-delay
003_disable-chef
004_disable-consul
005_disable-automatic-failover
006_convert-azure-master-to-standby
007_check-gcp-nodes-has-same-azure-lsn
008_perform-gcp-candidate-master-promote
009_check-gcp-candidate-master-is-master
010_enable-automatic-failover-on-gcp-only
011_check-repmgr-master
012_enable-consul-on-gcp-only
013_enable-chef-on-gcp-only
014_restore-could-not-change-directory-to-message
)
function get-rid-of-could-not-change-directory-to-message(){
for host in "$AZURE_HOSTS"
do
ssh_remote "$host" bash -c 'chmod o+x "$HOME"'
done
}
function create-tombstone-table(){
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
echo "Create tombstone database and table if not already existing"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
}
function check-gcp-replication-delay(){
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${ENVIRONMENT}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
echo "Insert '$tombstone_msg' into tombstone"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql tombstone -c "insert into tombstone(note) values('${tombstone_msg}') returning *"
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
while true
do
find_new_msg="$(ssh_remote "$GCP_MASTER_CANDIDATE" sudo gitlab-psql -Atd tombstone -c "select created_at from tombstone where note = '$tombstone_msg'")"
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]
then
gcp_cur_rep_delay="$(ssh_remote "$GCP_MASTER_CANDIDATE"
sudo gitlab-psql -Atd postgres -c "select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))")"
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE (GCP MASTER CANDIDATE). The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
......@@ -53,9 +53,8 @@ function check-gcp-replication-delay(){
}
function disable-chef(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "stopping chef on $host"
echo "Stopping chef on $host"
ssh_remote "$host" sudo service chef-client stop
ssh_remote "$host" sudo mv /etc/chef /etc/chef.migration
done
......@@ -64,28 +63,33 @@ function disable-chef(){
function disable-consul(){
for host in "${AZURE_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${AZURE_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
}
function disable-automatic-failover(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Stopping repmgrd on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
for host in "${AZURE_HOSTS[@]}"
do
......@@ -93,44 +97,35 @@ function disable-automatic-failover(){
then
continue
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Stopping repmgrd on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
echo "Stopping consul on $AZURE_MASTER"
ssh_remote "$AZURE_MASTER" sudo sv stop /opt/gitlab/sv/repmgrd
}
function convert-azure-master-to-standby(){
ssh_remote "$AZURE_MASTER" $(cat << EOF
echo "
standby_mode = 'on'
recovery_target_timeline = 'latest'" > /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo chown postgres:postgres /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo chmod 600 /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo sv -W 1 stop /opt/gitlab/sv/postgres \
|| (sudo sv int /opt/gitlab/sv/postgres \
&& sudo sv -W 60 stop /opt/gitlab/sv/postgres)
EOF
)
echo "standby_mode = 'on'
recovery_target_timeline = 'latest'" | \
ssh_remote "$AZURE_MASTER" sudo tee /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo chown postgres:postgres /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo chmod 600 /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo sv -W 1 stop /opt/gitlab/sv/postgres \
|| (ssh_remote "$AZURE_MASTER" sudo sv int /opt/gitlab/sv/postgres \
&& ssh_remote "$AZURE_MASTER" sudo sv -W 60 stop /opt/gitlab/sv/postgres)
}
function check-gcp-nodes-has-same-azure-lsn(){
while true
do
azure_master_lsn="$(ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
azure_master_lsn="$(ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select case when pg_is_in_recovery()
then pg_last_xlog_replay_location()
else pg_current_xlog_location() end;";
EOF
))"
gcp_master_candidate_lsn="$(ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
else pg_current_xlog_location() end;")"
gcp_master_candidate_lsn="$(ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select case when pg_is_in_recovery()
then pg_last_xlog_replay_location()
else pg_current_xlog_location() end;";
EOF
))"
else pg_current_xlog_location() end;")"
if [ "$azure_master_lsn" == "$gcp_master_candidate_lsn" ]
then
echo "GCP and Azure have same LSN: $azure_master_lsn"
......@@ -142,52 +137,86 @@ EOF
}
function perform-gcp-candidate-master-promote(){
ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql /opt/gitlab/embedded/bin/pg_ctl \
promote -D /var/lib/opt/gitlab/postgresql/data;
EOF
)
ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-psql /opt/gitlab/embedded/bin/pg_ctl \
promote -D /var/lib/opt/gitlab/postgresql/data
}
function check-gcp-candidate-master-is-master(){
ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select pg_is_in_recovery()";
EOF
) | grep -q 'f'
if ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select pg_is_in_recovery()" | grep -q 'f'
then
echo "$GCP_MASTER_CANDIDATE is master"
return 0
else
>&2 echo "$GCP_MASTER_CANDIDATE is master"
return 1
fi
}
function enable-automatic-failover-on-gcp-only(){
ssh "$GCP_MASTER_CANDIDATE" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Register $GCP_MASTER_CANDIDATE as master with repmgr"
ssh_remote "$GCP_MASTER_CANDIDATE" sudo gitlab-ctl repmgr register master
for host in "${GCP_HOSTS[@]}"
do
if [ "$GCP_MASTER_CANDIDATE" == "$host" ]
then
continue;
fi
echo "Register $host as standby with repmgr"
ssh_remote "$host" sudo gitlab-ctl repmgr register standby
done
echo "Starting repmgrd on $GCP_MASTER_CANDIDATE"
ssh_remote "$GCP_MASTER_CANDIDATE" sudo sv stop /opt/gitlab/sv/repmgrd
for host in "${GCP_HOSTS[@]}"
do
if [ "$GCP_MASTER_CANDIDATE" == "$host" ]
then
continue;
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Starting repmgrd on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/repmgrd
done
}
function enable-consul-on-gcp-only(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
echo "Starting consul agent on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/consul
done
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
echo "Starting consul agent on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/consul
done
}
function enable-chef-on-gcp-only(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "starting chef on $host"
echo "Starting chef-client on $host"
ssh_remote "$host" sudo mv /etc/chef.migration /etc/chef
ssh_remote "$host" sudo service chef-client start
done
}
function check-repmgr-master(){
echo "Checking state of $GCP_MASTER_CANDIDATE"
if ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-consul gitlab-ctl repmgr-check-master 2> /dev/null
then
echo "$GCP_MASTER_CANDIDATE is repmgr master"
return 0
else
>&2 echo "$GCP_MASTER_CANDIDATE is not repmgr master"
return 1
fi
}
function restore-could-not-change-directory-to-message(){
for host in "$AZURE_HOSTS"
do
ssh_remote "$host" bash -c 'chmod o-x "$HOME"'
done
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment