row_count.rb 5.56 KB
Newer Older
1 2
require "set"

3
module GitLab
4
  module Exporter
5 6 7
    module Database
      # A helper class that executes the query its given and returns an int of
      # the row count
8 9 10
      # This class works under the assumption you do COUNT(*) queries, define
      # queries in the QUERIES constant. If in doubt how these work, read
      # #construct_query
11
      class RowCountCollector < Base
Douwe Maan's avatar
Douwe Maan committed
12
        WHERE_MIRROR_ENABLED = <<~SQL.freeze
13 14
          projects.mirror = true
          AND project_mirror_data.retry_count <= 14
Douwe Maan's avatar
Douwe Maan committed
15
          AND (projects.visibility_level = 20 OR plans.name IN ('early_adopter', 'bronze', 'silver', 'gold'))
16 17 18 19 20 21 22 23
        SQL

        MIRROR_QUERY = {
          select: :projects,
          joins:  <<~SQL,
            INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
            INNER JOIN namespaces ON projects.namespace_id = namespaces.id
            LEFT JOIN plans ON namespaces.plan_id = plans.id
Douwe Maan's avatar
Douwe Maan committed
24
          SQL
25
          check: "SELECT 1 FROM information_schema.tables WHERE table_name='plans'"
Douwe Maan's avatar
Douwe Maan committed
26
        }.freeze
27

28
        QUERIES = {
29 30
          mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
31
              #{WHERE_MIRROR_ENABLED}
32
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
33 34 35 36 37
              AND project_mirror_data.next_execution_timestamp <= NOW()
            SQL
          ),
          mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
38
              #{WHERE_MIRROR_ENABLED}
39 40 41
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
              AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
              AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
42 43 44 45 46
            SQL
          ),
          mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              #{WHERE_MIRROR_ENABLED}
47 48
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
              AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
49 50 51 52
            SQL
          ),
          mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
53
              #{WHERE_MIRROR_ENABLED}
54
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
55
              AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
56 57 58 59
            SQL
          ),
          mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
60
              #{WHERE_MIRROR_ENABLED}
61
              AND project_mirror_data.status IN ('scheduled', 'started')
62 63 64 65
            SQL
          ),
          mirrors_scheduled: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
66
              #{WHERE_MIRROR_ENABLED}
67
              AND project_mirror_data.status = 'scheduled'
68 69 70 71
            SQL
          ),
          mirrors_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
72
              #{WHERE_MIRROR_ENABLED}
73
              AND project_mirror_data.status = 'started'
74 75
            SQL
          ),
76 77 78 79 80 81
          soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
          orphaned_projects: {
            select: :projects,
            joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
            where: "namespaces.id IS NULL"
          },
Zeger-Jan van de Weg's avatar
Zeger-Jan van de Weg committed
82
          uploads: { select: :uploads }
83
        }.freeze
84

85 86 87 88 89 90
        def initialize(args)
          super(args)

          @selected_queries = Set.new(args[:selected_queries].map(&:to_sym)) unless args[:selected_queries].nil?
        end

91 92 93
        def run
          results = Hash.new(0)

94 95
          QUERIES.each do |key, query_hash|
            next if query_hash[:check] && !successful_check?(query_hash[:check])
96
            next if !@selected_queries.nil? && !@selected_queries.include?(key)
97 98

            results[key] = count_from_query_hash(query_hash)
99 100 101 102 103 104 105
          end

          results
        end

        private

106 107 108 109 110 111 112 113 114 115 116 117 118 119
        def count_from_query_hash(query_hash)
          result = execute(construct_query(query_hash))
          return 0 unless result

          result[0]["count"]
        end

        def successful_check?(query)
          result = execute("SELECT EXISTS (#{query})")
          return unless result

          result[0]["exists"] == "t"
        end

120
        def execute(query)
121
          with_connection_pool do |conn|
122
            conn.exec(query)
123
          end
124
        rescue PG::UndefinedTable, PG::UndefinedColumn
125
          nil
126 127
        end

128 129 130 131 132 133
        # Not private so I can test it without meta programming tricks
        def construct_query(query)
          query_string = "SELECT COUNT(*) FROM #{query[:select]} "
          query_string << "#{query[:joins]} "       if query[:joins]
          query_string << "WHERE #{query[:where]}"  if query[:where]
          query_string << ";"
134 135 136 137 138 139 140
        end
      end

      # The prober which is called when gathering metrics
      class RowCountProber
        def initialize(opts, metrics: PrometheusMetrics.new)
          @metrics = metrics
141 142 143 144
          @collector = RowCountCollector.new(
            connection_string: opts[:connection_string],
            selected_queries: opts[:selected_queries]
          )
145 146 147 148 149
        end

        def probe_db
          results = @collector.run
          results.each do |key, value|
150
            @metrics.add("gitlab_database_rows", value.to_f, query_name: key.to_s)
151 152
          end

153 154
          self
        rescue PG::ConnectionBad
155 156 157 158 159 160 161 162 163 164
          self
        end

        def write_to(target)
          target.write(@metrics.to_s)
        end
      end
    end
  end
end