row_count.rb 4.74 KB
Newer Older
1 2 3 4 5
module GitLab
  module Monitor
    module Database
      # A helper class that executes the query its given and returns an int of
      # the row count
6 7 8
      # This class works under the assumption you do COUNT(*) queries, define
      # queries in the QUERIES constant. If in doubt how these work, read
      # #construct_query
9
      class RowCountCollector < Base
10 11 12 13 14 15 16 17 18 19 20 21
        WHERE_MIRROR_ENABLED = <<~SQL
          projects.mirror = true
          AND project_mirror_data.retry_count <= 14
          AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'));
        SQL

        MIRROR_QUERY = {
          select: :projects,
          joins:  <<~SQL,
            INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
            INNER JOIN namespaces ON projects.namespace_id = namespaces.id
            LEFT JOIN plans ON namespaces.plan_id = plans.id
Douwe Maan's avatar
Douwe Maan committed
22
          SQL
23 24 25
          check: "SELECT 1 FROM information_schema.tables WHERE table_name='project_mirror_data'"
        }

26
        QUERIES = {
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
          mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status NOT IN ('scheduled', 'started')
              AND project_mirror_data.next_execution_timestamp <= NOW()
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
          mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status NOT IN ('scheduled', 'started')
              AND projects.mirror_last_update_at < NOW() - '6 hours'::interval
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
          mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status NOT IN ('scheduled', 'started')
              AND project_mirror_data.next_execution_timestamp <= NOW() - '10 minutes'::interval
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
          mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status IN ('scheduled', 'started')
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
          mirrors_scheduled: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status = 'scheduled'
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
          mirrors_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              projects.import_status = 'started'
              AND #{WHERE_MIRROR_ENABLED}
            SQL
          ),
66 67 68 69 70 71
          soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
          orphaned_projects: {
            select: :projects,
            joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
            where: "namespaces.id IS NULL"
          },
Zeger-Jan van de Weg's avatar
Zeger-Jan van de Weg committed
72
          uploads: { select: :uploads }
73
        }.freeze
74 75 76 77

        def run
          results = Hash.new(0)

78 79 80 81
          QUERIES.each do |key, query_hash|
            next if query_hash[:check] && !successful_check?(query_hash[:check])

            results[key] = count_from_query_hash(query_hash)
82 83 84 85 86 87 88
          end

          results
        end

        private

89 90 91 92 93 94 95 96 97 98 99 100 101 102
        def count_from_query_hash(query_hash)
          result = execute(construct_query(query_hash))
          return 0 unless result

          result[0]["count"]
        end

        def successful_check?(query)
          result = execute("SELECT EXISTS (#{query})")
          return unless result

          result[0]["exists"] == "t"
        end

103
        def execute(query)
104
          with_connection_pool do |conn|
105
            conn.exec(query)
106
          end
107
        rescue PG::UndefinedTable, PG::UndefinedColumn
108
          nil
109 110
        end

111 112 113 114 115 116
        # Not private so I can test it without meta programming tricks
        def construct_query(query)
          query_string = "SELECT COUNT(*) FROM #{query[:select]} "
          query_string << "#{query[:joins]} "       if query[:joins]
          query_string << "WHERE #{query[:where]}"  if query[:where]
          query_string << ";"
117 118 119 120 121 122 123 124 125 126 127 128 129
        end
      end

      # The prober which is called when gathering metrics
      class RowCountProber
        def initialize(opts, metrics: PrometheusMetrics.new)
          @metrics = metrics
          @collector = RowCountCollector.new(connection_string: opts[:connection_string])
        end

        def probe_db
          results = @collector.run
          results.each do |key, value|
130
            @metrics.add("gitlab_database_rows", value.to_i, query_name: key.to_s)
131 132
          end

133 134
          self
        rescue PG::ConnectionBad
135 136 137 138 139 140 141 142 143 144
          self
        end

        def write_to(target)
          target.write(@metrics.to_s)
        end
      end
    end
  end
end