Unverified Commit e1c4c266 authored by Andreas Brandl's avatar Andreas Brandl
Browse files

Monitor database bloat

The queries are largely based on work from
https://github.com/ioguix/pgsql-bloat-estimation.

Context:
https://gitlab.com/gitlab-com/gl-infra/infrastructure/issues/6178
parent 3611f742
......@@ -24,6 +24,9 @@ metrics.
* [CI builds](lib/gitlab_monitor/database/ci_builds.rb) --
`ci_pending_builds`, `ci_created_builds`, `ci_stale_builds`,
`ci_running_builds`
* [Bloat](lib/gitlab_monitor/database/bloat.rb) --
`gitlab_database_bloat_$type_$key` with type `btree` (index bloat) or `table`
(table bloat) and keys `bloat_ratio bloat_size extra_size real_size` (see below)
1. Git
* [git pull/push timings](lib/gitlab_monitor/git.rb) --
`git_pull_time_milliseconds`, `git_push_time_milliseconds`
......@@ -82,6 +85,21 @@ Once running, you can point your browser or curl to the following URLs:
* http://localhost:9168/sidekiq
* http://localhost:9168/metrics (to get all of the above combined)
### Database Bloat Metrics
Database bloat is measured for indexes (`btree`) and/or tables
(`table`). Returned metrics contain:
* `bloat_ratio`: estimated ratio of the real size used by bloat_size.
* `bloat_size`: estimated size of the bloat without the extra space kept for the fillfactor.
* `extra_size`: estimated extra size not used/needed by the index. This extra size is composed by the fillfactor, bloat and alignment padding spaces.
* `real_size`: real size of the index
Also see the [original documentation](https://github.com/ioguix/pgsql-bloat-estimation/blob/master/README.md).
Note that all metrics returned are estimates without an upper bound for
the error.
## Contributing
gitlab-monitor is an open source project and we are very happy to accept community contributions. Please refer to [CONTRIBUTING.md](/CONTRIBUTING.md) for details.
......
......@@ -32,6 +32,10 @@ probes:
opts:
quantiles: true
database_bloat:
class_name: Database::BloatProber
<<: *db_common
# We can group multiple probes under a single endpoint by setting the `multiple` key to `true`, followed
# by probe definitions as usual.
database:
......
......@@ -6,6 +6,7 @@ module GitLab
autoload :CiBuildsProber, "gitlab_monitor/database/ci_builds"
autoload :TuplesProber, "gitlab_monitor/database/tuple_stats"
autoload :RowCountProber, "gitlab_monitor/database/row_count"
autoload :BloatProber, "gitlab_monitor/database/bloat"
end
end
end
module GitLab
module Monitor
module Database
# Helper to collect bloat metrics.
class BloatCollector < Base
def run(type = :btree)
execute(self.class.query_for(type)).each_with_object({}) do |row, h|
h[row["object_name"]] = row
end
end
private
def execute(query)
with_connection_pool do |conn|
conn.exec(query)
end
end
class << self
def query_for(type)
@queries ||= {}
return @queries[type] if @queries[type]
file = File.join(__dir__, "bloat_#{type}.sql")
fail "Unknown bloat query file: #{file}" unless File.exist?(file)
@queries[type] = File.read(file)
end
end
end
# Prober class to gather bloat metrics
class BloatProber
METRIC_KEYS = %w(bloat_ratio bloat_size extra_size real_size).freeze
attr_reader :metrics, :collector, :bloat_types
def initialize(opts,
metrics: PrometheusMetrics.new,
collector: BloatCollector.new(connection_string: opts[:connection_string]))
@metrics = metrics
@collector = collector
@bloat_types = opts[:bloat_types] || %i(btree table)
end
def probe_db
bloat_types.each do |type|
probe_for_type(type)
end
end
def write_to(target)
target.write(metrics.to_s)
end
private
def probe_for_type(type)
collector.run(type).each do |query_name, data|
METRIC_KEYS.each do |key|
metrics.add("gitlab_database_bloat_#{type}_#{key}", data[key], query_name: query_name)
end
end
self
rescue PG::ConnectionBad
self
end
end
end
end
end
-- Originally from: https://github.com/ioguix/pgsql-bloat-estimation/blob/master/btree/btree_bloat.sql
-- WARNING: executed with a non-superuser role, the query inspect only index on tables you are granted to read.
-- WARNING: rows with is_na = 't' are known to have bad statistics ("name" type is not supported).
-- This query is compatible with PostgreSQL 8.2 and after
SELECT current_database(), nspname AS schemaname, tblname, idxname AS object_name, bs*(relpages)::bigint AS real_size,
bs*(relpages-est_pages)::bigint AS extra_size,
100 * (relpages-est_pages)::float / relpages AS extra_ratio,
fillfactor,
CASE WHEN relpages > est_pages_ff
THEN bs*(relpages-est_pages_ff)
ELSE 0
END AS bloat_size,
100 * (relpages-est_pages_ff)::float / relpages AS bloat_ratio,
is_na
-- , 100-(sub.pst).avg_leaf_density, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, sub.reltuples, sub.relpages -- (DEBUG INFO)
FROM (
SELECT coalesce(1 +
ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
) AS est_pages,
coalesce(1 +
ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0
) AS est_pages_ff,
bs, nspname, table_oid, tblname, idxname, relpages, fillfactor, is_na
-- , stattuple.pgstatindex(quote_ident(nspname)||'.'||quote_ident(idxname)) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO)
FROM (
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, relam, table_oid, fillfactor,
( index_tuple_hdr_bm +
maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN
WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign
ELSE index_tuple_hdr_bm%maxalign
END
+ nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN
WHEN nulldatawidth = 0 THEN 0
WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
ELSE nulldatawidth::integer%maxalign
END
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
-- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO)
FROM (
SELECT
i.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, i.relam, a.attrelid AS table_oid,
current_setting('block_size')::numeric AS bs, fillfactor,
CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?)
WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8
ELSE 4
END AS maxalign,
/* per page header, fixed size: 20 for 7.X, 24 for others */
24 AS pagehdr,
/* per page btree opaque data */
16 AS pageopqdata,
/* per tuple header: add IndexAttributeBitMapData if some cols are null-able */
CASE WHEN max(coalesce(s.null_frac,0)) = 0
THEN 2 -- IndexTupleData size
ELSE 2 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8)
END AS index_tuple_hdr_bm,
/* data len: we remove null values save space using it fractionnal part from stats */
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth,
max( CASE WHEN a.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na
FROM pg_attribute AS a
JOIN (
SELECT nspname, tbl.relname AS tblname, idx.relname AS idxname, idx.reltuples, idx.relpages, idx.relam,
indrelid, indexrelid, indkey::smallint[] AS attnum,
coalesce(substring(
array_to_string(idx.reloptions, ' ')
from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor
FROM pg_index
JOIN pg_class idx ON idx.oid=pg_index.indexrelid
JOIN pg_class tbl ON tbl.oid=pg_index.indrelid
JOIN pg_namespace ON pg_namespace.oid = idx.relnamespace
WHERE pg_index.indisvalid AND tbl.relkind = 'r' AND idx.relpages > 0
) AS i ON a.attrelid = i.indexrelid
JOIN pg_stats AS s ON s.schemaname = i.nspname
AND ((s.tablename = i.tblname AND s.attname = pg_catalog.pg_get_indexdef(a.attrelid, a.attnum, TRUE)) -- stats from tbl
OR (s.tablename = i.idxname AND s.attname = a.attname))-- stats from functionnal cols
JOIN pg_type AS t ON a.atttypid = t.oid
WHERE a.attnum > 0
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
) AS s1
) AS s2
JOIN pg_am am ON s2.relam = am.oid WHERE am.amname = 'btree'
) AS sub
WHERE NOT is_na
AND nspname = 'public'
ORDER BY 2,3,4;
-- Originally from: https://github.com/ioguix/pgsql-bloat-estimation/blob/master/table/table_bloat.sql
/* WARNING: executed with a non-superuser role, the query inspect only tables you are granted to read.
* This query is compatible with PostgreSQL 9.0 and more
*/
SELECT current_database(), schemaname, tblname AS object_name, bs*tblpages AS real_size,
(tblpages-est_tblpages)*bs AS extra_size,
CASE WHEN tblpages - est_tblpages > 0
THEN 100 * (tblpages - est_tblpages)/tblpages::float
ELSE 0
END AS extra_ratio, fillfactor,
CASE WHEN tblpages - est_tblpages_ff > 0
THEN (tblpages-est_tblpages_ff)*bs
ELSE 0
END AS bloat_size,
CASE WHEN tblpages - est_tblpages_ff > 0
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float
ELSE 0
END AS bloat_ratio, is_na
-- , (pst).free_percent + (pst).dead_tuple_percent AS real_frag
FROM (
SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff,
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
-- , stattuple.pgstattuple(tblid) AS pst
FROM (
SELECT
( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
FROM (
SELECT
tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
coalesce(toast.reltuples, 0) AS toasttuples,
coalesce(substring(
array_to_string(tbl.reloptions, ' ')
FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor,
current_setting('block_size')::numeric AS bs,
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
24 AS page_hdr,
23 + CASE WHEN MAX(coalesce(null_frac,0)) > 0 THEN ( 7 + count(*) ) / 8 ELSE 0::int END
+ CASE WHEN tbl.relhasoids THEN 4 ELSE 0 END AS tpl_hdr_size,
sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024) ) AS tpl_data_size,
bool_or(att.atttypid = 'pg_catalog.name'::regtype)
OR count(att.attname) <> count(s.attname) AS is_na
FROM pg_attribute AS att
JOIN pg_class AS tbl ON att.attrelid = tbl.oid
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
WHERE att.attnum > 0 AND NOT att.attisdropped
AND tbl.relkind = 'r'
GROUP BY 1,2,3,4,5,6,7,8,9,10, tbl.relhasoids
ORDER BY 2,3
) AS s
) AS s2
) AS s3
WHERE NOT is_na
-- AND tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1
AND schemaname= 'public';
require "spec_helper"
require "gitlab_monitor/database/bloat"
describe GitLab::Monitor::Database::BloatCollector do
let(:connection_pool) { double("connection pool") }
let(:connection) { double("connection") }
subject { described_class.new(connection_string: "").run(type) }
let(:type) { :btree }
let(:query) { "select something" }
before do
allow_any_instance_of(described_class).to receive(:connection_pool).and_return(connection_pool)
allow(connection_pool).to receive(:with).and_yield(connection)
end
it "converts query results into a hash" do
row1 = { "object_name" => "o", "more_stuff" => 1 }
row2 = { "object_name" => "a", "more_stuff" => 2 }
expect(File).to receive(:read).and_return(query)
expect(connection).to receive(:exec).with(query).and_return([row1, row2])
expect(subject).to eq("o" => row1, "a" => row2)
end
end
describe GitLab::Monitor::Database::BloatProber do
let(:opts) { { bloat_types: %i(btree table) } }
let(:metrics) { double("PrometheusMetrics", add: nil) }
let(:collector) { double("BloatCollector", run: data) }
let(:data) do
{
"object" => {
"object_name" => "object",
"bloat_ratio" => 1,
"bloat_size" => 2,
"extra_size" => 3,
"real_size" => 4
}
}
end
describe"#probe_db" do
subject { described_class.new(opts, metrics: metrics, collector: collector).probe_db }
it "invokes the collector for each bloat type" do
expect(collector).to receive(:run).with(:btree)
expect(collector).to receive(:run).with(:table)
subject
end
it "adds bloat_ratio metric" do
opts[:bloat_types].each do |type|
expect(metrics).to receive(:add).with("gitlab_database_bloat_#{type}_bloat_ratio", 1, query_name: "object")
end
subject
end
it "adds bloat_size metric" do
opts[:bloat_types].each do |type|
expect(metrics).to receive(:add).with("gitlab_database_bloat_#{type}_bloat_size", 2, query_name: "object")
end
subject
end
it "adds extra_size metric" do
opts[:bloat_types].each do |type|
expect(metrics).to receive(:add).with("gitlab_database_bloat_#{type}_extra_size", 3, query_name: "object")
end
subject
end
it "adds real_size metric" do
opts[:bloat_types].each do |type|
expect(metrics).to receive(:add).with("gitlab_database_bloat_#{type}_real_size", 4, query_name: "object")
end
subject
end
end
context "#write_to" do
let(:target) { double }
let(:metrics) { double("PrometheusMetrics", to_s: double) }
subject { described_class.new(opts, metrics: metrics).write_to(target) }
it "writes to given target" do
expect(target).to receive(:write).with(metrics.to_s)
subject
end
end
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment