Commit 6e94aed4 authored by Matteo Melli's avatar Matteo Melli

Fixes and improvements

parent 233a83ad
Pipeline #88388 passed with stage
in 20 seconds
......@@ -9,40 +9,33 @@ export steps=(
007_perform-gcp-candidate-master-promote
008_check-gcp-candidate-master-is-master
009_enable-automatic-failover-on-gcp-only
010_check-repmgr-master
010_enable-consul-on-gcp-only
011_enable-chef-on-gcp-only
)
function create-tombstone-table(){
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
echo "Create tombstone database and table if not already existing"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
}
function check-gcp-replication-delay(){
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${ENVIRONMENT}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
echo "Insert '$tombstone_msg' into tombstone"
ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql tombstone -c "insert into tombstone(note) values('${tombstone_msg}') returning *"
# wait until the change is propagated
while [[ true ]]; do
while true
do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
ssh_remote "$GCP_MASTER_CANDIDATE" sudo gitlab-psql -Atd tombstone -c "select created_at from tombstone where note = '$tombstone_msg'"
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]
then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
ssh_remote "$GCP_MASTER_CANDIDATE"
sudo gitlab-psql -Atd postgres -c "select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))"
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE (GCP MASTER CANDIDATE). The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
......@@ -53,9 +46,8 @@ function check-gcp-replication-delay(){
}
function disable-chef(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "stopping chef on $host"
echo "Stopping chef on $host"
ssh_remote "$host" sudo service chef-client stop
ssh_remote "$host" sudo mv /etc/chef /etc/chef.migration
done
......@@ -64,28 +56,33 @@ function disable-chef(){
function disable-consul(){
for host in "${AZURE_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${AZURE_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
echo "Stopping consul on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/consul
done
}
function disable-automatic-failover(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Stopping repmgrd on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
for host in "${AZURE_HOSTS[@]}"
do
......@@ -93,44 +90,35 @@ function disable-automatic-failover(){
then
continue
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Stopping repmgrd on $host"
ssh_remote "$host" sudo sv stop /opt/gitlab/sv/repmgrd
done
echo "Stopping consul on $AZURE_MASTER"
ssh_remote "$AZURE_MASTER" sudo sv stop /opt/gitlab/sv/repmgrd
}
function convert-azure-master-to-standby(){
ssh_remote "$AZURE_MASTER" $(cat << EOF
echo "
standby_mode = 'on'
recovery_target_timeline = 'latest'" > /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo chown postgres:postgres /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo chmod 600 /var/lib/opt/gitlab/postgresql/data/recovery.conf
sudo sv -W 1 stop /opt/gitlab/sv/postgres \
|| (sudo sv int /opt/gitlab/sv/postgres \
&& sudo sv -W 60 stop /opt/gitlab/sv/postgres)
EOF
)
echo "standby_mode = 'on'
recovery_target_timeline = 'latest'" | \
ssh_remote "$AZURE_MASTER" sudo tee /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo chown postgres:postgres /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo chmod 600 /var/lib/opt/gitlab/postgresql/data/recovery.conf
ssh_remote "$AZURE_MASTER" sudo sv -W 1 stop /opt/gitlab/sv/postgres \
|| (ssh_remote "$AZURE_MASTER" sudo sv int /opt/gitlab/sv/postgres \
&& ssh_remote "$AZURE_MASTER" sudo sv -W 60 stop /opt/gitlab/sv/postgres)
}
function check-gcp-nodes-has-same-azure-lsn(){
while true
do
azure_master_lsn="$(ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
azure_master_lsn="$(ssh_remote "$AZURE_MASTER" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select case when pg_is_in_recovery()
then pg_last_xlog_replay_location()
else pg_current_xlog_location() end;";
EOF
))"
gcp_master_candidate_lsn="$(ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
else pg_current_xlog_location() end;")"
gcp_master_candidate_lsn="$(ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select case when pg_is_in_recovery()
then pg_last_xlog_replay_location()
else pg_current_xlog_location() end;";
EOF
))"
else pg_current_xlog_location() end;")"
if [ "$azure_master_lsn" == "$gcp_master_candidate_lsn" ]
then
echo "GCP and Azure have same LSN: $azure_master_lsn"
......@@ -151,43 +139,62 @@ EOF
}
function check-gcp-candidate-master-is-master(){
ssh_remote "$GCP_MASTER_CANDIDATE" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select pg_is_in_recovery()";
EOF
) | grep -q 'f'
if ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-psql gitlab-psql postgres \
-t -A -c "select pg_is_in_recovery()" | grep -q 'f'
then
echo "$GCP_MASTER_CANDIDATE is master"
return 0
else
>&2 echo "$GCP_MASTER_CANDIDATE is master"
return 1
fi
}
function enable-automatic-failover-on-gcp-only(){
ssh "$GCP_MASTER_CANDIDATE" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Starting repmgrd on $GCP_MASTER_CANDIDATE"
ssh_remote "$GCP_MASTER_CANDIDATE" sudo sv stop /opt/gitlab/sv/repmgrd
for host in "${GCP_HOSTS[@]}"
do
if [ "$GCP_MASTER_CANDIDATE" == "$host" ]
then
continue;
fi
ssh "$host" sudo sv stop /opt/gitlab/sv/repmgrd
echo "Starting repmgrd on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/repmgrd
done
}
function enable-consul-on-gcp-only(){
for host in "${GCP_HOSTS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
echo "Starting consul agent on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/consul
done
for host in "${GCP_PGBOUNCERS[@]}"
do
ssh "$host" sudo sv start /opt/gitlab/sv/consul
echo "Starting consul agent on $host"
ssh_remote "$host" sudo sv start /opt/gitlab/sv/consul
done
}
function enable-chef-on-gcp-only(){
# chef
for host in "${AZURE_HOSTS[@]}" "${GCP_HOSTS[@]}"; do
echo "starting chef on $host"
echo "Starting chef-client on $host"
ssh_remote "$host" sudo mv /etc/chef.migration /etc/chef
ssh_remote "$host" sudo service chef-client start
done
}
function check-repmgr-master(){
echo "Checking state of $GCP_MASTER_CANDIDATE"
if ssh_remote "$GCP_MASTER_CANDIDATE" sudo -u gitlab-consul gitlab-ctl repmgr-check-master 2> /dev/null
then
echo "$GCP_MASTER_CANDIDATE is repmgr master"
return 0
else
>&2 echo "$GCP_MASTER_CANDIDATE is not repmgr master"
return 1
fi
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment