Commit 8b30abe1 authored by Emanuel Calvo's avatar Emanuel Calvo

Added steps and continue working.

parent ec2288c6
Pipeline #88368 passed with stage
in 40 seconds
......@@ -21,6 +21,14 @@ postgres-03-db-gprd.c.gitlab-production.internal
postgres-01-db-gprd.c.gitlab-production.internal
postgres-04-db-gprd.c.gitlab-production.internal
)
AZURE_PGBOUNCER=(
pgbouncer-01.db.prd.gitlab.com
pgbouncer-02.db.prd.gitlab.com
)
GCP_PGBOUNCER=(
pgbouncer-01-db-gprd.c.gitlab-production.internal
pgbouncer-02-db-gprd.c.gitlab-production.internal
)
# Generic
......
......@@ -8,16 +8,34 @@ GCP_HOST_SUFFIX="-db-gstg.c.gitlab-staging-1.internal"
AZURE_MASTER="${AZURE_HOST_PREFIX}2${AZURE_HOST_SUFFIX}"
GCP_MASTER_CANDIDATE="${GCP_HOST_PREFIX}2${GCP_HOST_SUFFIX}"
AZURE_SLAVES=(
AZURE_HOSTS=(
postgres01.db.stg.gitlab.com
postgres02.db.stg.gitlab.com
)
GCP_SLAVES=(
GCP_HOSTS=(
postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-01-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
)
AZURE_SLAVES=(
postgres01.db.stg.gitlab.com
)
GCP_SLAVES=(
postgres-02-db-gstg.c.gitlab-staging-1.internal
postgres-03-db-gstg.c.gitlab-staging-1.internal
)
AZURE_PGBOUNCER=(
pgbouncer-01.db.stg.gitlab.com
)
GCP_PGBOUNCER=(
pgbouncer-01-db-gstg.c.gitlab-staging-1.internal
)
# Generic
......
......@@ -22,17 +22,6 @@ export steps=(
)
create_tombstone() {
ssh_remote "${1}" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
}
#######################################
# Check that GCP "main" replica is not lagging too much
# Globals:
......@@ -43,66 +32,106 @@ EOF
# Returns:
# None
#######################################
handle_gcp_replication_delay() { # put definition to the top
max_rep_delay=10 # TODO(NikolayS) is 10s ok? Double-check after turning SR on
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone";
sudo -u gitlab-psql gitlab-psql postgres \
-c "create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
function 000_create-tombstone-table(){
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone; create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${GITLAB_ENV}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE. The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
echo "New tombstone message arrived to $GCP_MASTER_CANDIDATE, continue."
break
fi
done
return 0
}
function 001_check-gcp-replication-delay(){
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${ENVIRONMENT}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE (GCP MASTER CANDIDATE). The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
echo "New tombstone message arrived to $GCP_MASTER_CANDIDATE."
break
fi
done
function 000_create-tombstone-table(){
create_tombstone $AZURE_MASTER
return 0
}
function 001_check-gcp-replication-delay(){
return 0
}
function 002_disable-chef(){
echo "We're about to stop disable chef and stop repmgr on $GITLAB_ENV environment. To proceed, type '$GITLAB_ENV':"
read proceed_cmd
if [[ "$proceed_cmd" != "$GITLAB_ENV" ]]; then
>&2 echo "Stop."
return
fi
# chef
for i in $(seq 1 $N_OF_HOSTS); do
for host in "${AZURE_HOST_PREFIX}$i${AZURE_HOST_SUFFIX}" "${GCP_HOST_PREFIX}$i${GCP_HOST_SUFFIX}"; do
echo "${1}ing chef on $host"
# WARNING: the following line modifies the state
if [[ "$1" == "stop" ]]; then
ssh_remote "$host" "sudo service chef-client stop"
ssh_remote "$host" "sudo mv /etc/chef /etc/chef.migration"
elif [[ "$1" == "start" ]]; then
ssh_remote "$host" "sudo mv /etc/chef.migration /etc/chef"
ssh_remote "$host" "sudo service chef-client start"
else
>&2 echo "1st argument must be either \"stop\" or \"start\" (provided: $1)."
return
fi
done
done
return 0
}
function 003_disable-consul(){
return 0
}
for host in $AZURE_PGBOUNCER
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in $GCP_PGBOUNCER
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in $AZURE_HOSTS
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
for host in $GCP_HOSTS
do
ssh "$host" sudo sv stop /opt/gitlab/sv/consul
done
return 0
}
#
#
#
function 004_disable-automatic-failover(){
ssh_remote "$AZURE_MASTER" "sudo sv $1 /opt/gitlab/sv/repmgrd"
return 0
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment