sidekiq.rb 4.72 KB
Newer Older
Ahmad Sherif's avatar
Ahmad Sherif committed
1
require "sidekiq/api"
2
require "digest"
Ahmad Sherif's avatar
Ahmad Sherif committed
3 4

module GitLab
5
  module Exporter
Ahmad Sherif's avatar
Ahmad Sherif committed
6 7 8 9
    # A prober for Sidekiq queues
    #
    # It takes the Redis URL Sidekiq is connected to
    class SidekiqProber
10 11 12
      QUEUE_JOB_STATS_SCRIPT = File.read(File.expand_path("#{__FILE__}/../sidekiq_queue_job_stats.lua")).freeze
      QUEUE_JOB_STATS_SHA    = Digest::SHA1.hexdigest(QUEUE_JOB_STATS_SCRIPT).freeze

Stan Hu's avatar
Stan Hu committed
13
      def initialize(opts, metrics: PrometheusMetrics.new, logger: nil)
Ahmad Sherif's avatar
Ahmad Sherif committed
14 15
        @opts    = opts
        @metrics = metrics
Stan Hu's avatar
Stan Hu committed
16
        @logger  = logger
Ahmad Sherif's avatar
Ahmad Sherif committed
17

18
        Sidekiq.configure_client do |config|
19
          config.redis = redis_options
20 21 22
        end

        ensure_queue_job_stats_script_loaded
Ahmad Sherif's avatar
Ahmad Sherif committed
23 24
      end

chiehminw's avatar
chiehminw committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
      def probe_stats
        return self unless connected?

        stats = Sidekiq::Stats.new

        @metrics.add("sidekiq_jobs_processed_total", stats.processed)
        @metrics.add("sidekiq_jobs_failed_total", stats.failed)
        @metrics.add("sidekiq_jobs_enqueued_size", stats.enqueued)
        @metrics.add("sidekiq_jobs_scheduled_size", stats.scheduled_size)
        @metrics.add("sidekiq_jobs_retry_size", stats.retry_size)
        @metrics.add("sidekiq_jobs_dead_size", stats.dead_size)

        @metrics.add("sidekiq_default_queue_latency_seconds", stats.default_queue_latency)
        @metrics.add("sidekiq_processes_size", stats.processes_size)
        @metrics.add("sidekiq_workers_size", stats.workers_size)

        self
      end

44
      def probe_queues
Ahmad Sherif's avatar
Ahmad Sherif committed
45 46 47 48
        return self unless connected?

        Sidekiq::Queue.all.each do |queue|
          @metrics.add("sidekiq_queue_size", queue.size, name: queue.name)
Ben Kochie's avatar
Ben Kochie committed
49
          @metrics.add("sidekiq_queue_latency_seconds", queue.latency, name: queue.name)
50
          @metrics.add("sidekiq_queue_paused", queue.paused? ? 1 : 0, name: queue.name)
51 52 53 54
        end

        self
      end
Ahmad Sherif's avatar
Ahmad Sherif committed
55

56
      def probe_jobs
57 58 59 60 61
        return self unless connected?

        job_stats = {}

        Sidekiq::Queue.all.each do |queue|
62 63 64
          Sidekiq.redis do |conn|
            stats = conn.evalsha(QUEUE_JOB_STATS_SHA, ["queue:#{queue.name}"])
            job_stats.merge!(stats.to_h)
65
          end
66 67 68
        rescue Redis::CommandError # Could happen if the script exceeded the maximum run time (5 seconds by default)
          # FIXME: Should we call SCRIPT KILL?
          return self
Ahmad Sherif's avatar
Ahmad Sherif committed
69 70 71
        end

        job_stats.each do |class_name, count|
Ben Kochie's avatar
Ben Kochie committed
72
          @metrics.add("sidekiq_enqueued_jobs", count, name: class_name)
Ahmad Sherif's avatar
Ahmad Sherif committed
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
        end

        self
      end

      def probe_workers
        return self unless connected?

        worker_stats = Hash.new(0)

        Sidekiq::Workers.new.map do |_pid, _tid, work|
          job_klass = work["payload"]["class"]

          worker_stats[job_klass] += 1
        end

        worker_stats.each do |class_name, count|
Ben Kochie's avatar
Ben Kochie committed
90
          @metrics.add("sidekiq_running_jobs", count, name: class_name)
Ahmad Sherif's avatar
Ahmad Sherif committed
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
        end

        self
      end

      def probe_retries
        return self unless connected?

        retry_stats = Hash.new(0)

        Sidekiq::RetrySet.new.map do |job|
          retry_stats[job.klass] += 1
        end

        retry_stats.each do |class_name, count|
Ben Kochie's avatar
Ben Kochie committed
106
          @metrics.add("sidekiq_to_be_retried_jobs", count, name: class_name)
Ahmad Sherif's avatar
Ahmad Sherif committed
107 108 109 110 111
        end

        self
      end

Ahmad Sherif's avatar
Ahmad Sherif committed
112
      def probe_dead
chiehminw's avatar
chiehminw committed
113 114 115
        puts "[DEPRECATED] probe_dead is now considered obsolete and will be removed in future major versions,"\
             " please use probe_stats instead"

Ahmad Sherif's avatar
Ahmad Sherif committed
116 117
        return self unless connected?

Ben Kochie's avatar
Ben Kochie committed
118
        @metrics.add("sidekiq_dead_jobs", Sidekiq::Stats.new.dead_size)
Ahmad Sherif's avatar
Ahmad Sherif committed
119 120 121 122

        self
      end

Ahmad Sherif's avatar
Ahmad Sherif committed
123 124 125 126 127 128
      def write_to(target)
        target.write(@metrics.to_s)
      end

      private

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
      def redis_options
        options = {
          url: @opts[:redis_url],
          namespace: "resque:gitlab",
          connect_timeout: 1,
          reconnect_attempts: 0
        }

        options[:id] = nil unless redis_enable_client?
        options
      end

      def redis_enable_client?
        return true if @opts[:redis_enable_client].nil?

        @opts[:redis_enable_client]
      end

Ahmad Sherif's avatar
Ahmad Sherif committed
147 148 149 150 151 152 153
      def connected?
        @connected ||= begin
                         Sidekiq.redis do |conn|
                           conn.get("foo")
                         end
                         true
                       end
154
      rescue Redis::CannotConnectError, Redis::TimeoutError
Ahmad Sherif's avatar
Ahmad Sherif committed
155 156
        # Maybe we're trying connecting to a slave
      end
157 158 159 160 161

      def ensure_queue_job_stats_script_loaded
        return unless connected?

        Sidekiq.redis do |conn|
162 163 164
          # Using administrative commands on conn directly (which is a Redis::Namespace)
          # will be removed in redis-namespace 2.0.
          next if conn.redis.script(:exists, QUEUE_JOB_STATS_SHA)
165

166
          conn.redis.script(:load, QUEUE_JOB_STATS_SCRIPT)
167 168
        end
      end
Ahmad Sherif's avatar
Ahmad Sherif committed
169 170 171
    end
  end
end