Commit 73b4ca8a authored by Ahmad Sherif's avatar Ahmad Sherif
Browse files

Merge branch 'remove-probe-jobs' into 'master'

Remove Sidekiq `probe_jobs` probe

See merge request gitlab-org/gitlab-exporter!145
parents 5d27f4dd ef863d1e
Pipeline #203893 passed with stage
in 1 minute and 53 seconds
......@@ -275,7 +275,7 @@ module GitLab
::GitLab::Exporter::SidekiqProber.new(redis_url: @redis_url)
.probe_stats
.probe_queues
.probe_jobs
.probe_jobs_limit
.probe_workers
.probe_retries
.write_to(@target)
......
......@@ -7,9 +7,6 @@ module GitLab
#
# It takes the Redis URL Sidekiq is connected to
class SidekiqProber
QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze
# The maximum depth (from the head) of each queue to probe. Probing the
# entirety of a very large queue will take longer and run the risk of
# timing out. But when we have a very large queue, we are most in need of
......@@ -72,45 +69,19 @@ module GitLab
self
end
# Count worker classes present in Sidekiq queues. This uses a Lua
# script to find all jobs in all queues. That script will block
# all other Redis commands:
# https://redis.io/commands/eval#atomicity-of-scripts
#
# The script is generally fast, but may be slower with very large
# queues, which is why this is not enabled by default.
def probe_jobs
with_sidekiq do
job_stats = {}
Sidekiq::Queue.all.each do |queue|
Sidekiq.redis do |conn|
stats = conn.evalsha(QUEUE_JOB_STATS_SHA, ["queue:#{queue.name}"])
job_stats.merge!(stats.to_h)
end
rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
# FIXME: Should we call SCRIPT KILL?
return self
end
job_stats.each do |class_name, count|
@metrics.add("sidekiq_enqueued_jobs", count, name: class_name)
end
end
puts "[REMOVED] probe_jobs is now considered obsolete and does not emit any metrics,"\
" please use probe_jobs_limit instead"
self
end
# This does the same as #probe_jobs, but only looks at the first
# PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
# Count worker classes present in Sidekiq queues. This only looks at the
# first PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
# single LRANGE command for each queue, which does not block other
# commands. For queues over PROBE_JOBS_LIMIT in size, this means
# that we will not have completely accurate statistics, but the
# probe performance will also not degrade as the queue gets
# larger.
#
# DO NOT USE this and probe_jobs together, as they export the same
# metric (sidekiq_enqueued_jobs).
# commands. For queues over PROBE_JOBS_LIMIT in size, this means that we
# will not have completely accurate statistics, but the probe performance
# will also not degrade as the queue gets larger.
def probe_jobs_limit
with_sidekiq do
job_stats = Hash.new(0)
......
--
-- Adapted from https://github.com/mperham/sidekiq/blob/2f9258e4fe77991c526f7a65c92bcf792eef8338/lib/sidekiq/api.rb#L231
--
local queue_name = KEYS[1]
local initial_size = redis.call('llen', queue_name)
local deleted_size = 0
local page = 0
local page_size = 2000
local temp_job_stats = {}
local final_job_stats = {}
while true do
local range_start = page * page_size - deleted_size
local range_end = range_start + page_size - 1
local entries = redis.call('lrange', queue_name, range_start, range_end)
if #entries == 0 then
break
end
page = page + 1
for index, entry in next, entries do
local class = cjson.decode(entry)['class']
if class ~= nil then
if temp_job_stats[class] ~= nil then
temp_job_stats[class] = temp_job_stats[class] + 1
else
temp_job_stats[class] = 1
end
end
end
deleted_size = initial_size - redis.call('llen', queue_name)
end
for class, count in next, temp_job_stats do
local stat_entry = {class, count}
table.insert(final_job_stats, stat_entry)
end
return final_job_stats
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment