row_count.rb 9.21 KB
Newer Older
1 2
require "set"

3
module GitLab
4
  module Exporter
5 6 7
    module Database
      # A helper class that executes the query its given and returns an int of
      # the row count
8 9 10
      # This class works under the assumption you do COUNT(*) queries, define
      # queries in the QUERIES constant. If in doubt how these work, read
      # #construct_query
11
      # rubocop:disable Metrics/ClassLength
12
      class RowCountCollector < Base
13 14 15 16 17
        # We ignore mirrors with a next_execution_timestamp before
        # 2020-03-28 because this is when we stopped processing mirrors
        # for private projects on the free plan. Skipping those can
        # significantly improve query performance:
        # https://gitlab.com/gitlab-org/gitlab/-/issues/216252#note_334514544
Douwe Maan's avatar
Douwe Maan committed
18
        WHERE_MIRROR_ENABLED = <<~SQL.freeze
19
          projects.mirror = true
20
          AND projects.archived = false
21
          AND project_mirror_data.retry_count <= 14
22 23 24 25 26
          AND (
            (projects.visibility_level = 20 AND root_namespaces.visibility_level = 20)
            OR
            plans.name IN ('early_adopter', 'bronze', 'silver', 'gold')
          )
27
          AND project_mirror_data.next_execution_timestamp > '2020-03-28'
28 29 30 31 32 33
        SQL

        MIRROR_QUERY = {
          select: :projects,
          joins:  <<~SQL,
            INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
34 35 36 37 38 39 40 41 42
            INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
              WITH RECURSIVE "base_and_ancestors" AS (
                (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
                UNION
                (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
              ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
            )
            LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
            LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
Douwe Maan's avatar
Douwe Maan committed
43
          SQL
44
          check: "SELECT 1 FROM information_schema.tables WHERE table_name='plans'"
Douwe Maan's avatar
Douwe Maan committed
45
        }.freeze
46

47
        QUERIES = {
48 49
          mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
50
              #{WHERE_MIRROR_ENABLED}
51
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
52 53 54 55 56
              AND project_mirror_data.next_execution_timestamp <= NOW()
            SQL
          ),
          mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
57
              #{WHERE_MIRROR_ENABLED}
58 59 60
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
              AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
              AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
61 62 63 64 65
            SQL
          ),
          mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
              #{WHERE_MIRROR_ENABLED}
66 67
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
              AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
68 69 70 71
            SQL
          ),
          mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
72
              #{WHERE_MIRROR_ENABLED}
73
              AND project_mirror_data.status NOT IN ('scheduled', 'started')
74
              AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
75 76 77 78
            SQL
          ),
          mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
79
              #{WHERE_MIRROR_ENABLED}
80
              AND project_mirror_data.status IN ('scheduled', 'started')
81 82 83 84
            SQL
          ),
          mirrors_scheduled: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
85
              #{WHERE_MIRROR_ENABLED}
86
              AND project_mirror_data.status = 'scheduled'
87 88 89 90
            SQL
          ),
          mirrors_started: MIRROR_QUERY.merge( # EE only
            where: <<~SQL
91
              #{WHERE_MIRROR_ENABLED}
92
              AND project_mirror_data.status = 'started'
93 94
            SQL
          ),
95 96 97 98 99 100
          soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
          orphaned_projects: {
            select: :projects,
            joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
            where: "namespaces.id IS NULL"
          },
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
101 102 103 104 105 106 107 108 109 110 111 112
          uploads: { select: :uploads },
          users: {
            select: :users,
            joins: "LEFT JOIN
              (
                SELECT
                  members.user_id,
                  MAX(access_level) as access_level
                FROM members
                GROUP BY members.user_id
              ) AS u
              ON users.id = u.user_id",
pshutsin's avatar
pshutsin committed
113
            where: "user_type IS NULL",
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
            fields: {
              admin:        {},
              external:     {},
              state:        {},
              access_level: { definition: "COALESCE(u.access_level, 0)" }
            }
          },
          projects: {
            select: :projects,
            fields: {
              visibility_level: {},
              archived:         {}
            }
          },
          groups: {
            select: :namespaces,
            fields: {
              visibility_level: {},
              root:             { definition: "(parent_id IS NULL)" }
            }
          }
135
        }.freeze
136

137 138 139 140 141 142
        def initialize(args)
          super(args)

          @selected_queries = Set.new(args[:selected_queries].map(&:to_sym)) unless args[:selected_queries].nil?
        end

143 144 145
        def run
          results = Hash.new(0)

146 147
          QUERIES.each do |key, query_hash|
            next if query_hash[:check] && !successful_check?(query_hash[:check])
148
            next if !@selected_queries.nil? && !@selected_queries.include?(key)
149 150

            results[key] = count_from_query_hash(query_hash)
151 152 153 154 155 156 157
          end

          results
        end

        private

158 159
        def count_from_query_hash(query_hash)
          result = execute(construct_query(query_hash))
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
160
          return [{ "count": 0, "labels": {} }] unless result
161

Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
162 163 164 165 166
          result.map do |row|
            labels = {}
            (query_hash[:fields] || []).each do |key, _| labels[key] = row[key.to_s] end
            { "count": row["count"], "labels": labels }
          end
167 168 169 170 171 172
        end

        def successful_check?(query)
          result = execute("SELECT EXISTS (#{query})")
          return unless result

Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
173
          result[0]["exists"]
174 175
        end

176
        def execute(query)
177
          with_connection_pool do |conn|
178 179 180 181 182 183 184 185
            conn.exec(query).map_types!(type_map_for_results(conn))
          end
        rescue PG::UndefinedTable, PG::UndefinedColumn
          nil
        end

        def type_map_for_results(conn)
          @type_map_for_results ||= begin
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
186 187 188 189 190 191 192 193 194 195 196 197
            tm = PG::BasicTypeMapForResults.new(conn)

            # Remove warning message:
            # Warning: no type cast defined for type "name" with oid 19.
            # Please cast this type explicitly to TEXT to be safe for future changes.
            # Warning: no type cast defined for type "regproc" with oid 24.
            # Please cast this type explicitly to TEXT to be safe for future changes.
            [{ "type": "text", "oid": 19 }, { "type": "int4", "oid": 24 }].each do |value|
              old_coder = tm.coders.find { |c| c.name == value[:type] }
              tm.add_coder(old_coder.dup.tap { |c| c.oid = value[:oid] })
            end

198
            tm
199
          end
200 201
        end

202 203
        # Not private so I can test it without meta programming tricks
        def construct_query(query)
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
204 205 206 207 208 209 210 211 212 213
          query_string = "SELECT COUNT(*)"
          (query[:fields] || []).each do |key, value|
            query_string << ", "
            query_string << "(#{value[:definition]}) AS " if value[:definition]
            query_string << key.to_s
          end
          query_string << " FROM #{query[:select]}"
          query_string << " #{query[:joins]}"       if query[:joins]
          query_string << " WHERE #{query[:where]}" if query[:where]
          query_string << " GROUP BY " + query[:fields].keys.join(", ") if query[:fields]
214
          query_string << ";"
215 216
        end
      end
217
      # rubocop:enable Metrics/ClassLength
218 219 220

      # The prober which is called when gathering metrics
      class RowCountProber
Stan Hu's avatar
Stan Hu committed
221
        def initialize(opts, metrics: PrometheusMetrics.new, logger: nil) # rubocop:disable Lint/UnusedMethodArgument
222
          @metrics = metrics
223 224 225 226
          @collector = RowCountCollector.new(
            connection_string: opts[:connection_string],
            selected_queries: opts[:selected_queries]
          )
227 228 229 230
        end

        def probe_db
          results = @collector.run
Frédéric PLANCHON's avatar
Frédéric PLANCHON committed
231 232 233 234 235
          results.each do |query_name, result|
            labels = { query_name: query_name.to_s }
            result.each do |row|
              @metrics.add("gitlab_database_rows", row[:count].to_f, **labels, **row[:labels])
            end
236 237
          end

238 239
          self
        rescue PG::ConnectionBad
240 241 242 243 244 245 246 247 248 249
          self
        end

        def write_to(target)
          target.write(@metrics.to_s)
        end
      end
    end
  end
end