Commit 985b041a authored by Nikolay's avatar Nikolay
Browse files

reworked tombstone and replication delay handling

parent 687940de
Pipeline #88338 passed with stage
in 17 seconds
......@@ -378,33 +378,47 @@ state of the secondary to converge.
# Returns:
# None
#######################################
check_gcp_replication_delay() { # put definition to the top
handle_gcp_replication_delay() { # put definition to the top
max_rep_delay=10 # TODO(NikolayS) is 10s ok? Double-check after turning SR on
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('pre-switchover check') returning *\""
gcp_cur_rep_delay=$(ssh_remote "$GCP_MASTER_CANDIDATE" \
"sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'")
if [[ ! -z ${gcp_cur_rep_delay+x} && "$max_rep_delay" -gt "$gcp_cur_rep_delay" ]]; then # TODO(NikolayS) do it in morchestra?
echo "Check if GCP delay < ${max_rep_delay}s): OK (delay: ~${gcp_cur_rep_delay}s)"
else
echo "Check if GCP delay < ${max_rep_delay}s): FAIL (delay: ~${gcp_cur_rep_delay}s)"
fi
}
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone" \
-c "create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table tombstone (created_at timestamptz default now() primary key, note text)"
# "create database if not exists" is not supported in Postgres,
# so to make the following action idempotent and not depending on the pre-actions,
# we better re-create tombstone DB and table from scratch
ssh_remote "$AZURE_MASTER" $(cat << EOF
cd /tmp;
sudo -u gitlab-psql gitlab-psql postgres \
-c "drop database if exists tombstone";
sudo -u gitlab-psql gitlab-psql postgres \
-c "create database tombstone";
sudo -u gitlab-psql gitlab-psql tombstone \
-c "create table if not exists tombstone (created_at timestamptz default now() primary key, note text)"
EOF
)
tombstone_msg=$(date +'%Y%m%d_%H%M%S')"_${GITLAB_ENV}"
ssh_remote "$AZURE_MASTER" \
"cd /tmp; sudo -u gitlab-psql gitlab-psql tombstone -c \"insert into tombstone(note) values('${tombstone_msg}') returning *\""
# wait until the change is propagated
while [[ true ]]; do
find_new_msg=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd tombstone -c \"select created_at from tombstone where note = '$tombstone_msg'\""
)
if [[ -z ${find_new_msg+x} ]] || [[ "$find_new_msg" == "" ]]; then
gcp_cur_rep_delay=$(
ssh_remote "$GCP_MASTER_CANDIDATE" \
"cd /tmp; sudo gitlab-psql -Atd postgres -c 'select round(extract(epoch from (now() - pg_last_xact_replay_timestamp())))'"
)
echo "New tombstone message is not seen on $GCP_MASTER_CANDIDATE. The replication delay: ${gcp_cur_rep_delay}s. Wait 3 seconds..."
sleep 3
else
echo "New tombstone message arrived to $GCP_MASTER_CANDIDATE, continue."
break
fi
done
}
check_gcp_replication_delay
handle_gcp_replication_delay
```
1. [ ] 🐺 {+ Coordinator +}: Now disable all sidekiq-cron jobs on the secondary
* In a dedicated rails console on the **secondary**:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment