Commit 4eb63cce authored by Ahmad Sherif's avatar Ahmad Sherif
Browse files

Merge branch 'sidekiq-jobs-limit' into 'master'

Add probe_jobs_limit probe for Sidekiq

See merge request gitlab-org/gitlab-exporter!143
parents 2fa2477d 38c4c422
Pipeline #202528 passed with stage
in 2 minutes and 2 seconds
...@@ -15,6 +15,21 @@ module GitLab ...@@ -15,6 +15,21 @@ module GitLab
@include_timestamp = include_timestamp @include_timestamp = include_timestamp
end end
class << self
def describe(name, description)
@metric_descriptions ||= {}
@metric_descriptions[name] = description
end
def description(name)
@metric_descriptions && @metric_descriptions[name]
end
def clear_descriptions
@metric_descriptions = {}
end
end
def add(name, value, quantile = false, **labels) def add(name, value, quantile = false, **labels)
fail "value '#{value}' must be a number" unless value.is_a?(Numeric) fail "value '#{value}' must be a number" unless value.is_a?(Numeric)
...@@ -32,6 +47,8 @@ module GitLab ...@@ -32,6 +47,8 @@ module GitLab
buffer = "" buffer = ""
@metrics.each do |name, measurements| @metrics.each do |name, measurements|
buffer << "# HELP #{name} #{self.class.description(name)}\n" if self.class.description(name)
measurements.each do |measurement| measurements.each do |measurement|
buffer << name.to_s buffer << name.to_s
labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",") labels = (measurement[:labels] || {}).map { |label, value| "#{label}=\"#{value}\"" }.join(",")
......
...@@ -10,6 +10,13 @@ module GitLab ...@@ -10,6 +10,13 @@ module GitLab
QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze QUEUE_JOB_STATS_SHA = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze
# The maximum depth (from the head) of each queue to probe. Probing the
# entirety of a very large queue will take longer and run the risk of
# timing out. But when we have a very large queue, we are most in need of
# reliable metrics. This trades off completeness for predictability by
# only taking a limited amount of items from the head of the queue.
PROBE_JOBS_LIMIT = 1_000
POOL_SIZE = 3 POOL_SIZE = 3
# This timeout is configured to higher interval than scrapping # This timeout is configured to higher interval than scrapping
...@@ -17,6 +24,9 @@ module GitLab ...@@ -17,6 +24,9 @@ module GitLab
# needed to be re-initialized # needed to be re-initialized
POOL_TIMEOUT = 90 POOL_TIMEOUT = 90
PrometheusMetrics.describe("sidekiq_enqueued_jobs",
"Total number of jobs enqueued by class name. Only inspects the first #{PROBE_JOBS_LIMIT} jobs per queue.") # rubocop:disable Layout/LineLength
def self.connection_pool def self.connection_pool
@@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars @@connection_pool ||= Hash.new do |h, connection_hash| # rubocop:disable Style/ClassVars
config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE) config = connection_hash.merge(pool_timeout: POOL_TIMEOUT, size: POOL_SIZE)
...@@ -62,6 +72,13 @@ module GitLab ...@@ -62,6 +72,13 @@ module GitLab
self self
end end
# Count worker classes present in Sidekiq queues. This uses a Lua
# script to find all jobs in all queues. That script will block
# all other Redis commands:
# https://redis.io/commands/eval#atomicity-of-scripts
#
# The script is generally fast, but may be slower with very large
# queues, which is why this is not enabled by default.
def probe_jobs def probe_jobs
with_sidekiq do with_sidekiq do
job_stats = {} job_stats = {}
...@@ -84,6 +101,38 @@ module GitLab ...@@ -84,6 +101,38 @@ module GitLab
self self
end end
# This does the same as #probe_jobs, but only looks at the first
# PROBE_JOBS_LIMIT jobs in each queue. This means that we run a
# single LRANGE command for each queue, which does not block other
# commands. For queues over PROBE_JOBS_LIMIT in size, this means
# that we will not have completely accurate statistics, but the
# probe performance will also not degrade as the queue gets
# larger.
#
# DO NOT USE this and probe_jobs together, as they export the same
# metric (sidekiq_enqueued_jobs).
def probe_jobs_limit
with_sidekiq do
job_stats = Hash.new(0)
Sidekiq::Queue.all.each do |queue|
Sidekiq.redis do |conn|
conn.lrange("queue:#{queue.name}", 0, PROBE_JOBS_LIMIT).each do |job|
job_class = Sidekiq.load_json(job)["class"]
job_stats[job_class] += 1
end
end
end
job_stats.each do |class_name, count|
@metrics.add("sidekiq_enqueued_jobs", count, name: class_name)
end
end
self
end
def probe_workers def probe_workers
with_sidekiq do with_sidekiq do
worker_stats = Hash.new(0) worker_stats = Hash.new(0)
......
...@@ -23,4 +23,21 @@ describe GitLab::Exporter::PrometheusMetrics do ...@@ -23,4 +23,21 @@ describe GitLab::Exporter::PrometheusMetrics do
subject.add("mymetric", "invalid", mylabel: "x", myotherlabel: "y").to_s subject.add("mymetric", "invalid", mylabel: "x", myotherlabel: "y").to_s
}.to raise_error(RuntimeError) }.to raise_error(RuntimeError)
end end
it "supports described metrics" do
time = Time.now
allow(Time).to receive(:now).and_return(time)
described_class.describe("mymetric", "description")
described_class.describe("missingmetric", "otherdescription")
subject.add("mymetric", 1.3, mylabel: "x", myotherlabel: "y")
expect(subject.to_s).to eq(<<~METRICS)
# HELP mymetric description
mymetric{mylabel="x",myotherlabel="y"} 1.3 #{(time.to_f * 1000).to_i}
METRICS
described_class.clear_descriptions
end
end end
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment