Merge remote-tracking branch 'rschu1ze/master' into comp_expr_cache_settings

This commit is contained in:
Robert Schulze 2024-06-23 15:01:45 +00:00
commit 2015622997
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
7 changed files with 13 additions and 134 deletions

2
contrib/re2 vendored

@ -1 +1 @@
Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c
Subproject commit 85dd7ad833a73095ecf3e3baea608ba051bbe2c7

View File

@ -28,16 +28,20 @@ set(RE2_SOURCES
add_library(_re2 ${RE2_SOURCES})
target_include_directories(_re2 PUBLIC "${SRC_DIR}")
target_link_libraries(_re2 PRIVATE
absl::absl_check
absl::absl_log
absl::base
absl::core_headers
absl::fixed_array
absl::flags
absl::flat_hash_map
absl::flat_hash_set
absl::hash
absl::inlined_vector
absl::strings
absl::str_format
absl::synchronization
absl::optional
absl::span)
absl::span
absl::str_format
absl::strings
absl::synchronization)
add_library(ch_contrib::re2 ALIAS _re2)

View File

@ -48,7 +48,7 @@ public:
/// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
/// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
/// Then use getWithKey() to also return the found key including it's non-hashed data.
/// Then use getWithKey() to also return the found key including its non-hashed data.
virtual MappedPtr get(const Key & key) = 0;
virtual std::optional<KeyMapped> getWithKey(const Key &) = 0;

View File

@ -86,7 +86,7 @@ namespace DB
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \
M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \
M(UInt64, page_cache_size, 10ul << 30, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \
M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \

View File

@ -30,5 +30,7 @@
</policies>
</storage_configuration>
<page_cache_size>10000000000</page_cache_size>
<query_log></query_log>
</clickhouse>

View File

@ -1,21 +0,0 @@
cold read 54975576145920
PageCacheBytesUnpinnedRoundedToHugePages 1
PageCacheBytesUnpinnedRoundedToPages 1
PageCacheChunkMisses 1
ReadBufferFromS3Bytes 1
repeat read 1 54975576145920
PageCacheBytesUnpinnedRoundedToHugePages 1
PageCacheBytesUnpinnedRoundedToPages 1
PageCacheChunkDataHits 1
dropped and bypassed cache 54975576145920
PageCacheChunkMisses 1
ReadBufferFromS3Bytes 1
repeat read 2 54975576145920
PageCacheBytesUnpinnedRoundedToHugePages 1
PageCacheBytesUnpinnedRoundedToPages 1
PageCacheChunkMisses 1
ReadBufferFromS3Bytes 1
repeat read 3 54975576145920
PageCacheBytesUnpinnedRoundedToHugePages 1
PageCacheBytesUnpinnedRoundedToPages 1
PageCacheChunkDataHits 1

View File

@ -1,106 +0,0 @@
-- Tags: no-fasttest, no-parallel
-- no-fasttest because we need an S3 storage policy
-- no-parallel because we look at server-wide counters about page cache usage
set use_page_cache_for_disks_without_file_cache = 1;
set page_cache_inject_eviction = 0;
set enable_filesystem_cache = 0;
set use_uncompressed_cache = 0;
create table events_snapshot engine Memory as select * from system.events;
create view events_diff as
-- round all stats to 70 MiB to leave a lot of leeway for overhead
with if(event like '%Bytes%', 70*1024*1024, 35) as granularity,
-- cache hits counter can vary a lot depending on other settings:
-- e.g. if merge_tree_min_bytes_for_concurrent_read is small, multiple threads will read each chunk
-- so we just check that the value is not too low
if(event in (
'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages',
'PageCacheChunkDataHits'), 1, 1000) as clamp
select event, min2(intDiv(new.value - old.value, granularity), clamp) as diff
from system.events new
left outer join events_snapshot old
on old.event = new.event
where diff != 0 and
event in (
'ReadBufferFromS3Bytes', 'PageCacheChunkMisses', 'PageCacheChunkDataMisses',
'PageCacheChunkDataHits', 'PageCacheChunkDataPartialHits',
'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages')
order by event;
drop table if exists page_cache_03055;
create table page_cache_03055 (k Int64 CODEC(NONE)) engine MergeTree order by k settings storage_policy = 's3_cache';
-- Write an 80 MiB file (40 x 2 MiB chunks), and a few small files.
system stop merges page_cache_03055;
insert into page_cache_03055 select * from numbers(10485760) settings max_block_size=100000000, preferred_block_size_bytes=1000000000;
select * from events_diff;
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
system start merges page_cache_03055;
optimize table page_cache_03055 final;
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Cold read, should miss cache. (Populating cache on write is not implemented yet.)
select 'cold read', sum(k) from page_cache_03055;
select * from events_diff where event not in ('PageCacheChunkDataHits');
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Repeat read, should hit cache.
select 'repeat read 1', sum(k) from page_cache_03055;
select * from events_diff;
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Drop cache and read again, should miss. Also don't write to cache.
system drop page cache;
select 'dropped and bypassed cache', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1;
-- Data could be read multiple times because we're not writing to cache.
-- (Not checking PageCacheBytesUnpinned* because it's unreliable in this case because of an intentional race condition, see PageCache::evictChunk.)
select event, if(event in ('PageCacheChunkMisses', 'ReadBufferFromS3Bytes'), diff >= 1, diff) from events_diff where event not in ('PageCacheChunkDataHits', 'PageCacheBytesUnpinnedRoundedToPages', 'PageCacheBytesUnpinnedRoundedToHugePages');
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Repeat read, should still miss, but populate cache.
select 'repeat read 2', sum(k) from page_cache_03055;
select * from events_diff where event not in ('PageCacheChunkDataHits');
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Read again, hit the cache.
select 'repeat read 3', sum(k) from page_cache_03055 settings read_from_page_cache_if_exists_otherwise_bypass_cache = 1;
select * from events_diff;
truncate table events_snapshot;
insert into events_snapshot select * from system.events;
-- Known limitation: cache is not invalidated if a table is dropped and created again at the same path.
-- set allow_deprecated_database_ordinary=1;
-- create database test_03055 engine = Ordinary;
-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache';
-- insert into test_03055.t values (1);
-- select * from test_03055.t;
-- drop table test_03055.t;
-- create table test_03055.t (k Int64) engine MergeTree order by k settings storage_policy = 's3_cache';
-- insert into test_03055.t values (2);
-- select * from test_03055.t;
drop table events_snapshot;
drop table page_cache_03055;
drop view events_diff;