mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge remote-tracking branch 'upstream/master' into better-local-object-storage
This commit is contained in:
commit
298ed454ce
@ -176,6 +176,12 @@ if (OS_DARWIN)
|
||||
set (ENABLE_CURL_BUILD OFF)
|
||||
endif ()
|
||||
|
||||
option(ENABLE_ISAL_LIBRARY "Enable ISA-L library ON by default except on aarch64." ON)
|
||||
if (ARCH_AARCH64)
|
||||
# Disable ISA-L libray on aarch64.
|
||||
set (ENABLE_ISAL_LIBRARY OFF)
|
||||
endif ()
|
||||
|
||||
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
|
||||
# Can be lld or ld-lld or lld-13 or /path/to/lld.
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -191,7 +191,9 @@ add_contrib (google-benchmark-cmake google-benchmark)
|
||||
|
||||
add_contrib (ulid-c-cmake ulid-c)
|
||||
|
||||
add_contrib (isa-l-cmake isa-l)
|
||||
if (ENABLE_ISAL_LIBRARY)
|
||||
add_contrib (isa-l-cmake isa-l)
|
||||
endif()
|
||||
|
||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||
|
@ -1,6 +1,12 @@
|
||||
set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l")
|
||||
|
||||
# check nasm compiler
|
||||
# The YASM and NASM assembers are somewhat mutually compatible. ISAL specifically needs NASM. If only YASM is installed, then check_language(ASM_NASM)
|
||||
# below happily finds YASM, leading to weird errors at build time. Therefore, do an explicit check for NASM here.
|
||||
find_program(NASM_PATH NAMES nasm)
|
||||
if (NOT NASM_PATH)
|
||||
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
|
||||
endif ()
|
||||
|
||||
include(CheckLanguage)
|
||||
check_language(ASM_NASM)
|
||||
if(NOT CMAKE_ASM_NASM_COMPILER)
|
||||
|
@ -172,8 +172,10 @@ if (TARGET OpenSSL::SSL)
|
||||
target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
|
||||
endif()
|
||||
|
||||
target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
|
||||
add_definitions(-DHADOOP_ISAL_LIBRARY)
|
||||
if (ENABLE_ISAL_LIBRARY)
|
||||
target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
|
||||
add_definitions(-DHADOOP_ISAL_LIBRARY)
|
||||
endif()
|
||||
|
||||
add_library(ch_contrib::hdfs ALIAS _hdfs3)
|
||||
|
||||
|
@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
|
||||
### Install Prerequisites {#install-prerequisites}
|
||||
|
||||
``` bash
|
||||
sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
|
||||
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
|
||||
```
|
||||
|
||||
### Install and Use the Clang compiler
|
||||
@ -92,7 +92,7 @@ If all the components are installed, you may build in the same way as the steps
|
||||
Example for OpenSUSE Tumbleweed:
|
||||
|
||||
``` bash
|
||||
sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
|
||||
sudo zypper install git cmake ninja clang-c++ python lld nasm yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build
|
||||
cmake -S . -B build
|
||||
@ -103,7 +103,7 @@ Example for Fedora Rawhide:
|
||||
|
||||
``` bash
|
||||
sudo yum update
|
||||
sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
|
||||
sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk
|
||||
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
|
||||
mkdir build
|
||||
cmake -S . -B build
|
||||
|
@ -0,0 +1,118 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
|
||||
sidebar_position: 300
|
||||
sidebar_label: kolmogorovSmirnovTest
|
||||
---
|
||||
|
||||
# kolmogorovSmirnovTest
|
||||
|
||||
Applies Kolmogorov-Smirnov's test to samples from two populations.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
|
||||
```
|
||||
|
||||
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
|
||||
Samples must belong to continuous, one-dimensional probability distributions.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
|
||||
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
Let F(x) and G(x) be the CDFs of the first and second distributions respectively.
|
||||
- `'two-sided'`
|
||||
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
|
||||
And the alternative is that the distributions are not identical.
|
||||
- `'greater'`
|
||||
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
|
||||
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
|
||||
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
|
||||
- `'less'`.
|
||||
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
|
||||
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
|
||||
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
|
||||
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
|
||||
- `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples.
|
||||
- `'asymp'` - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
|
||||
- `'auto'` - the `'exact'` method is used when a maximum number of samples is less than 10'000.
|
||||
|
||||
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
|
||||
- calculated statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
randNormal(0, 10) AS value,
|
||||
0 AS num
|
||||
FROM numbers(10000)
|
||||
UNION ALL
|
||||
SELECT
|
||||
randNormal(0, 10) AS value,
|
||||
1 AS num
|
||||
FROM numbers(10000)
|
||||
)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
|
||||
│ (0.009899999999999996,0.37528595205132287) │
|
||||
└────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Note:
|
||||
P-value is bigger than 0.05 (for confidence level of 95%), so null hypothesis is not rejected.
|
||||
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
randStudentT(10) AS value,
|
||||
0 AS num
|
||||
FROM numbers(100)
|
||||
UNION ALL
|
||||
SELECT
|
||||
randNormal(0, 10) AS value,
|
||||
1 AS num
|
||||
FROM numbers(100)
|
||||
)
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
|
||||
│ (0.4100000000000002,6.61735760482795e-8) │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Note:
|
||||
P-value is less than 0.05 (for confidence level of 95%), so null hypothesis is rejected.
|
||||
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Kolmogorov-Smirnov'test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test)
|
@ -26,6 +26,7 @@
|
||||
#include <Common/TLDListsHolder.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Loggers/Loggers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
30
src/Common/Documentation.cpp
Normal file
30
src/Common/Documentation.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <Common/Documentation.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::string Documentation::examplesAsString() const
|
||||
{
|
||||
std::string res;
|
||||
for (const auto & [example_name, example_query] : examples)
|
||||
{
|
||||
res += example_name + ":\n\n";
|
||||
res += "```sql\n";
|
||||
res += example_query + "\n";
|
||||
res += "```\n";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string Documentation::categoriesAsString() const
|
||||
{
|
||||
if (categories.empty())
|
||||
return "";
|
||||
|
||||
std::string res = categories[0];
|
||||
for (size_t i = 1; i < categories.size(); ++i)
|
||||
res += ", " + categories[i];
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
@ -42,27 +42,44 @@ namespace DB
|
||||
*
|
||||
* Documentation does not support multiple languages.
|
||||
* The only available language is English.
|
||||
*
|
||||
* TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
|
||||
* TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
|
||||
*/
|
||||
struct Documentation
|
||||
{
|
||||
using Description = std::string;
|
||||
|
||||
using Syntax = std::string;
|
||||
|
||||
using Argument = std::string;
|
||||
using Arguments = std::vector<Argument>;
|
||||
|
||||
using ReturnedValue = std::string;
|
||||
|
||||
using ExampleName = std::string;
|
||||
using ExampleQuery = std::string;
|
||||
using Examples = std::map<ExampleName, ExampleQuery>;
|
||||
|
||||
using Category = std::string;
|
||||
using Categories = std::vector<Category>;
|
||||
|
||||
using Related = std::string;
|
||||
|
||||
Description description;
|
||||
Examples examples;
|
||||
Categories categories;
|
||||
|
||||
Documentation(Description description_) : description(std::move(description_)) {}
|
||||
Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT
|
||||
Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {}
|
||||
Documentation(Description description_, Examples examples_, Categories categories_)
|
||||
: description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
|
||||
|
||||
/// TODO: Please remove this constructor. Documentation should always be non-empty.
|
||||
Documentation() {}
|
||||
Documentation() = default;
|
||||
|
||||
std::string examplesAsString() const;
|
||||
std::string categoriesAsString() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "CachedOnDiskReadBufferFromFile.h"
|
||||
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Common/assert_cast.h>
|
||||
@ -115,27 +116,25 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
|
||||
|
||||
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
|
||||
{
|
||||
file_segments_holder.emplace(cache->get(cache_key, offset, size));
|
||||
file_segments = cache->get(cache_key, offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
|
||||
file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size, create_settings));
|
||||
file_segments = cache->getOrSet(cache_key, offset, size, create_settings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Segments in returned list are ordered in ascending order and represent a full contiguous
|
||||
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
|
||||
*/
|
||||
if (file_segments_holder->file_segments.empty())
|
||||
if (file_segments->empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty");
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Having {} file segments to read: {}, current offset: {}",
|
||||
file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
|
||||
|
||||
current_file_segment_it = file_segments_holder->file_segments.begin();
|
||||
file_segments->size(), file_segments->toString(), file_offset_of_buffer_end);
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
@ -165,7 +164,7 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_)
|
||||
CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_)
|
||||
{
|
||||
switch (read_type_)
|
||||
{
|
||||
@ -201,7 +200,7 @@ CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
|
||||
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
|
||||
}
|
||||
|
||||
return remote_fs_segment_reader;
|
||||
@ -238,27 +237,27 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
|
||||
/// requested_range: [__________]
|
||||
/// ^
|
||||
/// current_offset
|
||||
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset();
|
||||
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true);
|
||||
return first_non_downloaded_offset > current_offset;
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
|
||||
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_segment)
|
||||
{
|
||||
auto download_state = file_segment->state();
|
||||
auto download_state = file_segment.state();
|
||||
|
||||
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
|
||||
{
|
||||
if (download_state == FileSegment::State::DOWNLOADED)
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TEST(log, "Bypassing cache because `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` option is used");
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteFSReadBuffer(*file_segment, read_type);
|
||||
return getRemoteReadBuffer(file_segment, read_type);
|
||||
}
|
||||
}
|
||||
|
||||
@ -266,15 +265,15 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
{
|
||||
switch (download_state)
|
||||
{
|
||||
case FileSegment::State::SKIP_CACHE:
|
||||
case FileSegment::State::DETACHED:
|
||||
{
|
||||
LOG_TRACE(log, "Bypassing cache because file segment state is `SKIP_CACHE`");
|
||||
LOG_TRACE(log, "Bypassing cache because file segment state is `DETACHED`");
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteFSReadBuffer(*file_segment, read_type);
|
||||
return getRemoteReadBuffer(file_segment, read_type);
|
||||
}
|
||||
case FileSegment::State::DOWNLOADING:
|
||||
{
|
||||
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
|
||||
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
|
||||
{
|
||||
/// segment{k} state: DOWNLOADING
|
||||
/// cache: [______|___________
|
||||
@ -285,21 +284,21 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
|
||||
download_state = file_segment->wait();
|
||||
download_state = file_segment.wait(file_offset_of_buffer_end);
|
||||
continue;
|
||||
}
|
||||
case FileSegment::State::DOWNLOADED:
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
case FileSegment::State::PARTIALLY_DOWNLOADED:
|
||||
{
|
||||
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
|
||||
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
|
||||
{
|
||||
/// segment{k} state: PARTIALLY_DOWNLOADED
|
||||
/// cache: [______|___________
|
||||
@ -310,13 +309,13 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
|
||||
auto downloader_id = file_segment->getOrSetDownloader();
|
||||
if (downloader_id == file_segment->getCallerId())
|
||||
auto downloader_id = file_segment.getOrSetDownloader();
|
||||
if (downloader_id == file_segment.getCallerId())
|
||||
{
|
||||
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
|
||||
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
|
||||
{
|
||||
/// segment{k}
|
||||
/// cache: [______|___________
|
||||
@ -327,11 +326,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
read_type = ReadType::CACHED;
|
||||
file_segment->resetDownloader();
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
file_segment.resetDownloader();
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
|
||||
if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
if (current_write_offset < file_offset_of_buffer_end)
|
||||
{
|
||||
/// segment{1}
|
||||
/// cache: [_____|___________
|
||||
@ -341,25 +341,25 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
|
||||
chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
|
||||
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
|
||||
chassert(bytes_to_predownload < file_segment->range().size());
|
||||
LOG_TEST(log, "Predownload. File segment info: {}", file_segment.getInfoForLog());
|
||||
chassert(file_offset_of_buffer_end > current_write_offset);
|
||||
bytes_to_predownload = file_offset_of_buffer_end - current_write_offset;
|
||||
chassert(bytes_to_predownload < file_segment.range().size());
|
||||
}
|
||||
|
||||
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||
return getRemoteFSReadBuffer(*file_segment, read_type);
|
||||
return getRemoteReadBuffer(file_segment, read_type);
|
||||
}
|
||||
|
||||
download_state = file_segment->state();
|
||||
download_state = file_segment.state();
|
||||
continue;
|
||||
}
|
||||
case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
|
||||
{
|
||||
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
|
||||
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
|
||||
{
|
||||
read_type = ReadType::CACHED;
|
||||
return getCacheReadBuffer(*file_segment);
|
||||
return getCacheReadBuffer(file_segment);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -367,7 +367,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
log,
|
||||
"Bypassing cache because file segment state is `PARTIALLY_DOWNLOADED_NO_CONTINUATION` and downloaded part already used");
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
return getRemoteFSReadBuffer(*file_segment, read_type);
|
||||
return getRemoteReadBuffer(file_segment, read_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -375,12 +375,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
|
||||
CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_segment)
|
||||
CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segment)
|
||||
{
|
||||
chassert(!file_segment->isDownloader());
|
||||
chassert(file_offset_of_buffer_end >= file_segment->range().left);
|
||||
chassert(!file_segment.isDownloader());
|
||||
chassert(file_offset_of_buffer_end >= file_segment.range().left);
|
||||
|
||||
auto range = file_segment->range();
|
||||
auto range = file_segment.range();
|
||||
bytes_to_predownload = 0;
|
||||
|
||||
Stopwatch watch(CLOCK_MONOTONIC);
|
||||
@ -392,17 +392,18 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
|
||||
ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
|
||||
|
||||
[[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||
chassert(download_current_segment == file_segment->isDownloader());
|
||||
chassert(download_current_segment == file_segment.isDownloader());
|
||||
|
||||
chassert(file_segment->range() == range);
|
||||
chassert(file_segment.range() == range);
|
||||
chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Current file segment: {}, read type: {}, current file offset: {}",
|
||||
range.toString(),
|
||||
"Current read type: {}, read offset: {}, impl read range: {}, file segment: {}",
|
||||
toString(read_type),
|
||||
file_offset_of_buffer_end);
|
||||
file_offset_of_buffer_end,
|
||||
read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
|
||||
file_segment.getInfoForLog());
|
||||
|
||||
read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
|
||||
|
||||
@ -444,11 +445,11 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
|
||||
}
|
||||
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
||||
{
|
||||
chassert(file_segment->isDownloader());
|
||||
chassert(file_segment.isDownloader());
|
||||
|
||||
if (bytes_to_predownload)
|
||||
{
|
||||
size_t current_write_offset = file_segment->getCurrentWriteOffset();
|
||||
const size_t current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET);
|
||||
}
|
||||
else
|
||||
@ -458,7 +459,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
|
||||
assert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
|
||||
}
|
||||
|
||||
auto current_write_offset = file_segment->getCurrentWriteOffset();
|
||||
const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
if (current_write_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
|
||||
{
|
||||
throw Exception(
|
||||
@ -469,7 +470,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
|
||||
current_write_offset,
|
||||
read_buffer_for_file_segment->getPosition(),
|
||||
read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
|
||||
file_segment->getInfoForLog());
|
||||
file_segment.getInfoForLog());
|
||||
}
|
||||
|
||||
break;
|
||||
@ -483,52 +484,46 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
|
||||
|
||||
bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
|
||||
{
|
||||
LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString());
|
||||
auto * current_file_segment = &file_segments->front();
|
||||
auto completed_range = current_file_segment->range();
|
||||
|
||||
if (enable_logging)
|
||||
appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
|
||||
appendFilesystemCacheLog(completed_range, read_type);
|
||||
|
||||
auto file_segment_it = current_file_segment_it++;
|
||||
auto & file_segment = *file_segment_it;
|
||||
|
||||
[[maybe_unused]] const auto & range = file_segment->range();
|
||||
chassert(file_offset_of_buffer_end > range.right);
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Removing file segment: {}, downloader: {}, state: {}",
|
||||
file_segment->range().toString(),
|
||||
file_segment->getDownloader(),
|
||||
file_segment->state());
|
||||
|
||||
/// Do not hold pointer to file segment if it is not needed anymore
|
||||
/// so can become releasable and can be evicted from cache.
|
||||
file_segment->completeWithoutState();
|
||||
file_segments_holder->file_segments.erase(file_segment_it);
|
||||
|
||||
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||
return false;
|
||||
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
chassert(file_offset_of_buffer_end > completed_range.right);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
{
|
||||
chassert(current_file_segment->getDownloadedSize(true) == current_file_segment->range().size());
|
||||
}
|
||||
|
||||
file_segments->popFront();
|
||||
if (file_segments->empty())
|
||||
return false;
|
||||
|
||||
current_file_segment = &file_segments->front();
|
||||
current_file_segment->use();
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
current_file_segment->incrementHitsCount();
|
||||
|
||||
LOG_TEST(
|
||||
log, "New segment range: {}, old range: {}",
|
||||
current_file_segment->range().toString(), completed_range.toString());
|
||||
|
||||
LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString());
|
||||
return true;
|
||||
}
|
||||
|
||||
CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile()
|
||||
{
|
||||
if (enable_logging
|
||||
&& file_segments_holder
|
||||
&& current_file_segment_it != file_segments_holder->file_segments.end())
|
||||
if (enable_logging && file_segments && !file_segments->empty())
|
||||
{
|
||||
appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
|
||||
appendFilesystemCacheLog(file_segments->front().range(), read_type);
|
||||
}
|
||||
}
|
||||
|
||||
void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
{
|
||||
Stopwatch predownload_watch(CLOCK_MONOTONIC);
|
||||
SCOPE_EXIT({
|
||||
@ -547,9 +542,10 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
/// download from offset a'' < a', but return buffer from offset a'.
|
||||
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
|
||||
|
||||
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment->getCurrentWriteOffset());
|
||||
size_t current_offset = file_segment->getCurrentWriteOffset();
|
||||
const auto & current_range = file_segment->range();
|
||||
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
|
||||
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false));
|
||||
size_t current_offset = file_segment.getCurrentWriteOffset(false);
|
||||
const auto & current_range = file_segment.range();
|
||||
|
||||
while (true)
|
||||
{
|
||||
@ -574,7 +570,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
"current download offset: {}, expected: {}, eof: {}",
|
||||
bytes_to_predownload,
|
||||
current_range.toString(),
|
||||
file_segment->getCurrentWriteOffset(),
|
||||
file_segment.getCurrentWriteOffset(false),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->eof());
|
||||
|
||||
@ -584,7 +580,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
{
|
||||
nextimpl_working_buffer_offset = implementation_buffer->offset();
|
||||
|
||||
auto current_write_offset = file_segment->getCurrentWriteOffset();
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
|
||||
if (current_write_offset != static_cast<size_t>(implementation_buffer->getPosition())
|
||||
|| current_write_offset != file_offset_of_buffer_end)
|
||||
{
|
||||
@ -596,7 +592,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
current_write_offset,
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->getPosition(),
|
||||
file_segment->getInfoForLog());
|
||||
file_segment.getInfoForLog());
|
||||
}
|
||||
}
|
||||
|
||||
@ -608,15 +604,15 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
|
||||
|
||||
bool continue_predownload = file_segment->reserve(current_predownload_size);
|
||||
bool continue_predownload = file_segment.reserve(current_predownload_size);
|
||||
if (continue_predownload)
|
||||
{
|
||||
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
|
||||
|
||||
chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
bool success = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, *file_segment);
|
||||
if (success)
|
||||
continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment);
|
||||
if (continue_predownload)
|
||||
{
|
||||
current_offset += current_predownload_size;
|
||||
|
||||
@ -626,13 +622,8 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
else
|
||||
{
|
||||
LOG_TEST(log, "Bypassing cache because writeCache (in predownload) method failed");
|
||||
continue_predownload = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
}
|
||||
|
||||
if (!continue_predownload)
|
||||
{
|
||||
@ -652,21 +643,21 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
/// TODO: allow seek more than once with seek avoiding.
|
||||
|
||||
bytes_to_predownload = 0;
|
||||
file_segment.completePartAndResetDownloader();
|
||||
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
|
||||
chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION
|
||||
|| file_segment->state() == FileSegment::State::SKIP_CACHE);
|
||||
LOG_TEST(log, "Bypassing cache because for {}", file_segment->getInfoForLog());
|
||||
LOG_TEST(log, "Bypassing cache because for {}", file_segment.getInfoForLog());
|
||||
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
|
||||
swap(*implementation_buffer);
|
||||
resetWorkingBuffer();
|
||||
|
||||
implementation_buffer = getRemoteFSReadBuffer(*file_segment, read_type);
|
||||
implementation_buffer = getRemoteReadBuffer(file_segment, read_type);
|
||||
|
||||
swap(*implementation_buffer);
|
||||
|
||||
implementation_buffer->setReadUntilPosition(file_segment->range().right + 1); /// [..., range.right]
|
||||
implementation_buffer->setReadUntilPosition(file_segment.range().right + 1); /// [..., range.right]
|
||||
implementation_buffer->seek(file_offset_of_buffer_end, SEEK_SET);
|
||||
|
||||
LOG_TRACE(
|
||||
@ -683,12 +674,12 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
|
||||
|
||||
bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
{
|
||||
auto & file_segment = *current_file_segment_it;
|
||||
auto current_read_range = file_segment->range();
|
||||
auto current_state = file_segment->state();
|
||||
auto & file_segment = file_segments->front();
|
||||
const auto & current_read_range = file_segment.range();
|
||||
auto current_state = file_segment.state();
|
||||
|
||||
chassert(current_read_range.left <= file_offset_of_buffer_end);
|
||||
chassert(!file_segment->isDownloader());
|
||||
chassert(!file_segment.isDownloader());
|
||||
|
||||
if (file_offset_of_buffer_end > current_read_range.right)
|
||||
{
|
||||
@ -707,7 +698,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
/// ^
|
||||
/// file_offset_of_buffer_end
|
||||
|
||||
auto current_write_offset = file_segment->getCurrentWriteOffset();
|
||||
auto current_write_offset = file_segment.getCurrentWriteOffset(true);
|
||||
bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end;
|
||||
|
||||
LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end);
|
||||
@ -715,7 +706,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
if (cached_part_is_finished)
|
||||
{
|
||||
/// TODO: makes sense to reuse local file reader if we return here with CACHED read type again?
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
implementation_buffer = getImplementationBuffer(file_segment);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -743,7 +734,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
|
||||
* to read by marks range given to him. Therefore, each nextImpl() call, in case of
|
||||
* READ_AND_PUT_IN_CACHE, starts with getOrSetDownloader().
|
||||
*/
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
implementation_buffer = getImplementationBuffer(file_segment);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -794,15 +785,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
{
|
||||
last_caller_id = FileSegment::getCallerId();
|
||||
|
||||
assertCorrectness();
|
||||
|
||||
if (file_offset_of_buffer_end == read_until_position)
|
||||
return false;
|
||||
|
||||
if (!initialized)
|
||||
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
|
||||
|
||||
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||
if (file_segments->empty())
|
||||
return false;
|
||||
|
||||
bool implementation_buffer_can_be_reused = false;
|
||||
@ -812,25 +801,25 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
/// Save state of current file segment before it is completed.
|
||||
nextimpl_step_log_info = getInfoForLog();
|
||||
|
||||
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||
if (file_segments->empty())
|
||||
return;
|
||||
|
||||
auto & file_segment = *current_file_segment_it;
|
||||
auto & file_segment = file_segments->front();
|
||||
|
||||
bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||
if (download_current_segment)
|
||||
{
|
||||
bool need_complete_file_segment = file_segment->isDownloader();
|
||||
bool need_complete_file_segment = file_segment.isDownloader();
|
||||
if (need_complete_file_segment)
|
||||
{
|
||||
if (!implementation_buffer_can_be_reused)
|
||||
file_segment->resetRemoteFileReader();
|
||||
file_segment.resetRemoteFileReader();
|
||||
|
||||
file_segment->completePartAndResetDownloader();
|
||||
file_segment.completePartAndResetDownloader();
|
||||
}
|
||||
}
|
||||
|
||||
chassert(!file_segment->isDownloader());
|
||||
chassert(!file_segment.isDownloader());
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -848,10 +837,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
}
|
||||
else
|
||||
{
|
||||
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
|
||||
implementation_buffer = getImplementationBuffer(file_segments->front());
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
(*current_file_segment_it)->incrementHitsCount();
|
||||
file_segments->front().incrementHitsCount();
|
||||
}
|
||||
|
||||
chassert(!internal_buffer.empty());
|
||||
@ -862,16 +851,16 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
// the caller doesn't try to use this CachedOnDiskReadBufferFromFile after it threw an exception.)
|
||||
swap(*implementation_buffer);
|
||||
|
||||
auto & file_segment = *current_file_segment_it;
|
||||
auto current_read_range = file_segment->range();
|
||||
auto & file_segment = file_segments->front();
|
||||
const auto & current_read_range = file_segment.range();
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Current count: {}, position: {}, buffer end: {}, file segment: {}",
|
||||
implementation_buffer->count(),
|
||||
implementation_buffer->getPosition(),
|
||||
"Current read type: {}, read offset: {}, impl offset: {}, file segment: {}",
|
||||
toString(read_type),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(),
|
||||
file_segment->getInfoForLog());
|
||||
file_segment.getInfoForLog());
|
||||
|
||||
chassert(current_read_range.left <= file_offset_of_buffer_end);
|
||||
chassert(current_read_range.right >= file_offset_of_buffer_end);
|
||||
@ -889,12 +878,12 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
}
|
||||
|
||||
auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||
if (download_current_segment != file_segment->isDownloader())
|
||||
if (download_current_segment != file_segment.isDownloader())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Incorrect segment state. Having read type: {}, file segment info: {}",
|
||||
toString(read_type), file_segment->getInfoForLog());
|
||||
toString(read_type), file_segment.getInfoForLog());
|
||||
}
|
||||
|
||||
if (!result)
|
||||
@ -936,7 +925,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
log,
|
||||
"Read {} bytes, read type {}, position: {}, offset: {}, segment end: {}",
|
||||
size, toString(read_type), implementation_buffer->getPosition(),
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(), file_segment->range().right);
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(), file_segment.range().right);
|
||||
|
||||
if (read_type == ReadType::CACHED)
|
||||
{
|
||||
@ -954,20 +943,20 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
{
|
||||
if (download_current_segment)
|
||||
{
|
||||
chassert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right);
|
||||
chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
|
||||
|
||||
bool success = file_segment->reserve(size);
|
||||
bool success = file_segment.reserve(size);
|
||||
if (success)
|
||||
{
|
||||
chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
|
||||
|
||||
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, *file_segment);
|
||||
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment);
|
||||
if (success)
|
||||
{
|
||||
chassert(file_segment->getCurrentWriteOffset() <= file_segment->range().right + 1);
|
||||
chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1);
|
||||
chassert(
|
||||
std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
|
||||
|| file_segment->getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
/* last_file_segment */file_segments->size() == 1
|
||||
|| file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
|
||||
|
||||
LOG_TEST(log, "Successfully written {} bytes", size);
|
||||
|
||||
@ -979,20 +968,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
LOG_TRACE(log, "Bypassing cache because writeCache method failed");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size);
|
||||
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
}
|
||||
|
||||
if (!success)
|
||||
{
|
||||
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
|
||||
download_current_segment = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1002,7 +984,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
|
||||
/// - If last file segment was read from local fs, then we could read more than
|
||||
/// file_segemnt->range().right, so resize is also needed.
|
||||
if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end())
|
||||
if (file_segments->size() == 1)
|
||||
{
|
||||
size_t remaining_size_to_read
|
||||
= std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1;
|
||||
@ -1022,17 +1004,17 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
|
||||
// No necessary because of the SCOPE_EXIT above, but useful for logging below.
|
||||
if (download_current_segment)
|
||||
file_segment->completePartAndResetDownloader();
|
||||
file_segment.completePartAndResetDownloader();
|
||||
|
||||
chassert(!file_segment->isDownloader());
|
||||
chassert(!file_segment.isDownloader());
|
||||
|
||||
LOG_TEST(
|
||||
log,
|
||||
"Key: {}. Returning with {} bytes, buffer position: {} (offset: {}, predownloaded: {}), "
|
||||
"buffer available: {}, current range: {}, current offset: {}, file segment state: {}, "
|
||||
"buffer available: {}, current range: {}, file offset of buffer end: {}, impl offset: {}, file segment state: {}, "
|
||||
"current write offset: {}, read_type: {}, reading until position: {}, started with offset: {}, "
|
||||
"remaining ranges: {}",
|
||||
getHexUIntLowercase(cache_key),
|
||||
cache_key.toString(),
|
||||
working_buffer.size(),
|
||||
getPosition(),
|
||||
offset(),
|
||||
@ -1040,12 +1022,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
available(),
|
||||
current_read_range.toString(),
|
||||
file_offset_of_buffer_end,
|
||||
FileSegment::stateToString(file_segment->state()),
|
||||
file_segment->getCurrentWriteOffset(),
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(),
|
||||
FileSegment::stateToString(file_segment.state()),
|
||||
file_segment.getCurrentWriteOffset(false),
|
||||
toString(read_type),
|
||||
read_until_position,
|
||||
first_offset,
|
||||
file_segments_holder->toString());
|
||||
file_segments->toString());
|
||||
|
||||
if (size == 0 && file_offset_of_buffer_end < read_until_position)
|
||||
{
|
||||
@ -1064,7 +1047,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
|
||||
cache_file_size ? std::to_string(cache_file_size) : "None",
|
||||
cache_file_path,
|
||||
implementation_buffer->getFileOffsetOfBufferEnd(),
|
||||
file_segment->getInfoForLog());
|
||||
file_segment.getInfoForLog());
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -1112,13 +1095,13 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
|
||||
first_offset = file_offset_of_buffer_end = new_pos;
|
||||
resetWorkingBuffer();
|
||||
|
||||
// if (file_segments_holder && current_file_segment_it != file_segments_holder->file_segments.end())
|
||||
// if (file_segments && current_file_segment_it != file_segments->file_segments.end())
|
||||
// {
|
||||
// auto & file_segments = file_segments_holder->file_segments;
|
||||
// auto & file_segments = file_segments->file_segments;
|
||||
// LOG_TRACE(
|
||||
// log,
|
||||
// "Having {} file segments to read: {}, current offset: {}",
|
||||
// file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
|
||||
// file_segments->file_segments.size(), file_segments->toString(), file_offset_of_buffer_end);
|
||||
|
||||
// auto it = std::upper_bound(
|
||||
// file_segments.begin(),
|
||||
@ -1149,7 +1132,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
|
||||
// }
|
||||
// }
|
||||
|
||||
file_segments_holder.reset();
|
||||
file_segments.reset();
|
||||
implementation_buffer.reset();
|
||||
initialized = false;
|
||||
|
||||
@ -1184,7 +1167,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position)
|
||||
|
||||
file_offset_of_buffer_end = getPosition();
|
||||
resetWorkingBuffer();
|
||||
file_segments_holder.reset();
|
||||
file_segments.reset();
|
||||
implementation_buffer.reset();
|
||||
initialized = false;
|
||||
|
||||
@ -1203,25 +1186,9 @@ off_t CachedOnDiskReadBufferFromFile::getPosition()
|
||||
return file_offset_of_buffer_end - available();
|
||||
}
|
||||
|
||||
std::optional<size_t> CachedOnDiskReadBufferFromFile::getLastNonDownloadedOffset() const
|
||||
{
|
||||
if (!file_segments_holder)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "File segments holder not initialized");
|
||||
|
||||
const auto & file_segments = file_segments_holder->file_segments;
|
||||
for (auto it = file_segments.rbegin(); it != file_segments.rend(); ++it)
|
||||
{
|
||||
const auto & file_segment = *it;
|
||||
if (file_segment->state() != FileSegment::State::DOWNLOADED)
|
||||
return file_segment->range().right;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void CachedOnDiskReadBufferFromFile::assertCorrectness() const
|
||||
{
|
||||
if (FileCache::isReadOnly()
|
||||
if (!CachedObjectStorage::canUseReadThroughCache()
|
||||
&& !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id);
|
||||
}
|
||||
@ -1229,16 +1196,16 @@ void CachedOnDiskReadBufferFromFile::assertCorrectness() const
|
||||
String CachedOnDiskReadBufferFromFile::getInfoForLog()
|
||||
{
|
||||
String current_file_segment_info;
|
||||
if (current_file_segment_it != file_segments_holder->file_segments.end())
|
||||
current_file_segment_info = (*current_file_segment_it)->getInfoForLog();
|
||||
else
|
||||
if (file_segments->empty())
|
||||
current_file_segment_info = "None";
|
||||
else
|
||||
current_file_segment_info = file_segments->front().getInfoForLog();
|
||||
|
||||
return fmt::format(
|
||||
"Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, read_until_position: {}, "
|
||||
"internal buffer end: {}, read_type: {}, last caller: {}, file segment info: {}",
|
||||
source_file_path,
|
||||
getHexUIntLowercase(cache_key),
|
||||
cache_key.toString(),
|
||||
file_offset_of_buffer_end,
|
||||
read_until_position,
|
||||
implementation_buffer ? std::to_string(implementation_buffer->getFileOffsetOfBufferEnd()) : "None",
|
||||
|
@ -62,26 +62,29 @@ public:
|
||||
private:
|
||||
using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;
|
||||
|
||||
ImplementationBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment);
|
||||
void initialize(size_t offset, size_t size);
|
||||
void assertCorrectness() const;
|
||||
|
||||
ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
|
||||
/**
|
||||
* Return a list of file segments ordered in ascending order. This list represents
|
||||
* a full contiguous interval (without holes).
|
||||
*/
|
||||
FileSegmentsHolderPtr getFileSegments(size_t offset, size_t size) const;
|
||||
|
||||
ImplementationBufferPtr getImplementationBuffer(FileSegment & file_segment);
|
||||
|
||||
ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment);
|
||||
|
||||
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
|
||||
|
||||
std::optional<size_t> getLastNonDownloadedOffset() const;
|
||||
ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_);
|
||||
|
||||
bool updateImplementationBufferIfNeeded();
|
||||
|
||||
void predownload(FileSegmentPtr & file_segment);
|
||||
void predownload(FileSegment & file_segment);
|
||||
|
||||
bool nextImplStep();
|
||||
|
||||
void initialize(size_t offset, size_t size);
|
||||
|
||||
void assertCorrectness() const;
|
||||
|
||||
std::shared_ptr<ReadBufferFromFileBase> getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_);
|
||||
|
||||
size_t getTotalSizeToRead();
|
||||
|
||||
bool completeFileSegmentAndGetNext();
|
||||
@ -108,8 +111,7 @@ private:
|
||||
/// Remote read buffer, which can only be owned by current buffer.
|
||||
FileSegment::RemoteFileReaderPtr remote_file_reader;
|
||||
|
||||
std::optional<FileSegmentsHolder> file_segments_holder;
|
||||
FileSegments::iterator current_file_segment_it;
|
||||
FileSegmentsHolderPtr file_segments;
|
||||
|
||||
ImplementationBufferPtr implementation_buffer;
|
||||
bool initialized = false;
|
||||
@ -143,7 +145,7 @@ private:
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::FilesystemCacheReadBuffers};
|
||||
ProfileEvents::Counters current_file_segment_counters;
|
||||
|
||||
FileCache::QueryContextHolder query_context_holder;
|
||||
FileCache::QueryContextHolderPtr query_context_holder;
|
||||
|
||||
bool is_persistent;
|
||||
};
|
||||
|
@ -50,27 +50,29 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
|
||||
offset, expected_write_offset);
|
||||
}
|
||||
|
||||
auto & file_segments = file_segments_holder.file_segments;
|
||||
FileSegment * file_segment;
|
||||
|
||||
if (file_segments.empty() || file_segments.back()->isDownloaded())
|
||||
if (file_segments.empty() || file_segments.back().isDownloaded())
|
||||
{
|
||||
allocateFileSegment(expected_write_offset, segment_kind);
|
||||
file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
|
||||
}
|
||||
else
|
||||
{
|
||||
file_segment = &file_segments.back();
|
||||
}
|
||||
|
||||
auto & file_segment = file_segments.back();
|
||||
|
||||
SCOPE_EXIT({
|
||||
if (file_segments.back()->isDownloader())
|
||||
file_segments.back()->completePartAndResetDownloader();
|
||||
if (file_segments.back().isDownloader())
|
||||
file_segments.back().completePartAndResetDownloader();
|
||||
});
|
||||
|
||||
while (size > 0)
|
||||
{
|
||||
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize();
|
||||
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false);
|
||||
if (available_size == 0)
|
||||
{
|
||||
completeFileSegment(*file_segment);
|
||||
file_segment = allocateFileSegment(expected_write_offset, segment_kind);
|
||||
file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -86,7 +88,6 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
|
||||
bool reserved = file_segment->reserve(size_to_write);
|
||||
if (!reserved)
|
||||
{
|
||||
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
|
||||
appendFilesystemCacheLog(*file_segment);
|
||||
|
||||
LOG_DEBUG(
|
||||
@ -113,11 +114,10 @@ void FileSegmentRangeWriter::finalize()
|
||||
if (finalized)
|
||||
return;
|
||||
|
||||
auto & file_segments = file_segments_holder.file_segments;
|
||||
if (file_segments.empty())
|
||||
return;
|
||||
|
||||
completeFileSegment(*file_segments.back());
|
||||
completeFileSegment(file_segments.back());
|
||||
finalized = true;
|
||||
}
|
||||
|
||||
@ -134,24 +134,21 @@ FileSegmentRangeWriter::~FileSegmentRangeWriter()
|
||||
}
|
||||
}
|
||||
|
||||
FileSegmentPtr & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
|
||||
FileSegment & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
|
||||
{
|
||||
/**
|
||||
* Allocate a new file segment starting `offset`.
|
||||
* File segment capacity will equal `max_file_segment_size`, but actual size is 0.
|
||||
*/
|
||||
|
||||
std::lock_guard cache_lock(cache->mutex);
|
||||
|
||||
CreateFileSegmentSettings create_settings(segment_kind);
|
||||
CreateFileSegmentSettings create_settings(segment_kind, false);
|
||||
|
||||
/// We set max_file_segment_size to be downloaded,
|
||||
/// if we have less size to write, file segment will be resized in complete() method.
|
||||
auto file_segment = cache->createFileSegmentForDownload(
|
||||
key, offset, cache->max_file_segment_size, create_settings, cache_lock);
|
||||
|
||||
auto & file_segments = file_segments_holder.file_segments;
|
||||
return *file_segments.insert(file_segments.end(), file_segment);
|
||||
auto holder = cache->set(key, offset, cache->getMaxFileSegmentSize(), create_settings);
|
||||
chassert(holder->size() == 1);
|
||||
holder->moveTo(file_segments);
|
||||
return file_segments.back();
|
||||
}
|
||||
|
||||
void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment)
|
||||
@ -159,7 +156,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
|
||||
if (cache_log)
|
||||
{
|
||||
auto file_segment_range = file_segment.range();
|
||||
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
|
||||
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1;
|
||||
|
||||
FilesystemCacheLogElement elem
|
||||
{
|
||||
@ -185,7 +182,7 @@ void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
|
||||
if (file_segment.isDetached() || file_segment.isCompleted())
|
||||
return;
|
||||
|
||||
file_segment.completeWithoutState();
|
||||
file_segment.complete();
|
||||
appendFilesystemCacheLog(file_segment);
|
||||
}
|
||||
|
||||
@ -224,7 +221,7 @@ void CachedOnDiskWriteBufferFromFile::nextImpl()
|
||||
{
|
||||
/// If something was already written to cache, remove it.
|
||||
cache_writer.reset();
|
||||
cache->removeIfExists(key);
|
||||
cache->removeKeyIfExists(key);
|
||||
|
||||
throw;
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ public:
|
||||
~FileSegmentRangeWriter();
|
||||
|
||||
private:
|
||||
FileSegmentPtr & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
|
||||
FileSegment & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
|
||||
|
||||
void appendFilesystemCacheLog(const FileSegment & file_segment);
|
||||
|
||||
@ -53,7 +53,7 @@ private:
|
||||
String query_id;
|
||||
String source_path;
|
||||
|
||||
FileSegmentsHolder file_segments_holder{};
|
||||
FileSegmentsHolder file_segments{};
|
||||
|
||||
size_t expected_write_offset = 0;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
|
||||
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
|
||||
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <iostream>
|
||||
#include <base/hex.h>
|
||||
@ -56,7 +57,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
|
||||
|
||||
if (with_cache)
|
||||
{
|
||||
auto cache_key = settings.remote_fs_cache->hash(object_path);
|
||||
auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
|
||||
return std::make_shared<CachedOnDiskReadBufferFromFile>(
|
||||
object_path,
|
||||
cache_key,
|
||||
|
@ -43,13 +43,7 @@ DataSourceDescription CachedObjectStorage::getDataSourceDescription() const
|
||||
|
||||
FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
|
||||
{
|
||||
return cache->hash(path);
|
||||
}
|
||||
|
||||
String CachedObjectStorage::getCachePath(const std::string & path) const
|
||||
{
|
||||
FileCache::Key cache_key = getCacheKey(path);
|
||||
return cache->getPathInLocalCache(cache_key);
|
||||
return cache->createKeyForPath(path);
|
||||
}
|
||||
|
||||
std::string CachedObjectStorage::generateBlobNameForPath(const std::string & path)
|
||||
@ -62,7 +56,7 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
|
||||
ReadSettings modified_settings{read_settings};
|
||||
modified_settings.remote_fs_cache = cache;
|
||||
|
||||
if (FileCache::isReadOnly())
|
||||
if (!canUseReadThroughCache())
|
||||
modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
|
||||
|
||||
return object_storage->patchSettings(modified_settings);
|
||||
@ -122,7 +116,6 @@ std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// N
|
||||
if (cache_on_write)
|
||||
{
|
||||
auto key = getCacheKey(path_key_for_cache);
|
||||
LOG_TEST(log, "Caching file `{}` to `{}` with key {}", object.absolute_path, getCachePath(path_key_for_cache), key.toString());
|
||||
|
||||
return std::make_unique<CachedOnDiskWriteBufferFromFile>(
|
||||
std::move(implementation_buffer),
|
||||
@ -143,7 +136,7 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c
|
||||
return;
|
||||
|
||||
/// Add try catch?
|
||||
cache->removeIfExists(getCacheKey(path_key_for_cache));
|
||||
cache->removeKeyIfExists(getCacheKey(path_key_for_cache));
|
||||
}
|
||||
|
||||
void CachedObjectStorage::removeObject(const StoredObject & object)
|
||||
@ -238,4 +231,11 @@ String CachedObjectStorage::getObjectsNamespace() const
|
||||
return object_storage->getObjectsNamespace();
|
||||
}
|
||||
|
||||
bool CachedObjectStorage::canUseReadThroughCache()
|
||||
{
|
||||
return CurrentThread::isInitialized()
|
||||
&& CurrentThread::get().getQueryContext()
|
||||
&& !CurrentThread::getQueryId().empty();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -113,11 +113,11 @@ public:
|
||||
|
||||
WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override;
|
||||
|
||||
static bool canUseReadThroughCache();
|
||||
|
||||
private:
|
||||
FileCache::Key getCacheKey(const std::string & path) const;
|
||||
|
||||
String getCachePath(const std::string & path) const;
|
||||
|
||||
ReadSettings patchSettings(const ReadSettings & read_settings) const override;
|
||||
|
||||
ObjectStoragePtr object_storage;
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h>
|
||||
#include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
|
||||
#include <Disks/FakeDiskTransaction.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
@ -128,7 +128,7 @@ std::unique_ptr<S3::Client> getClient(
|
||||
if (uri.key.back() != '/')
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
|
||||
|
||||
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
|
||||
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
|
||||
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
|
||||
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
|
||||
client_configuration.endpointOverride = uri.endpoint;
|
||||
|
@ -205,6 +205,10 @@ public:
|
||||
{
|
||||
return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
|
||||
}
|
||||
else if (isMap(arguments.at(0).type))
|
||||
{
|
||||
return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
|
||||
}
|
||||
else
|
||||
return std::make_unique<FunctionToFunctionBaseAdaptor>(
|
||||
FunctionConcat::create(context), collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type);
|
||||
|
@ -203,11 +203,13 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
||||
return offset_;
|
||||
|
||||
if (impl && restricted_seek)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
|
||||
"Seek is allowed only before first read attempt from the buffer (current offset: "
|
||||
"{}, new offset: {}, reading until position: {}, available: {})",
|
||||
getPosition(), offset_, read_until_position, available());
|
||||
ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
|
||||
"Seek is allowed only before first read attempt from the buffer (current offset: "
|
||||
"{}, new offset: {}, reading until position: {}, available: {})",
|
||||
getPosition(), offset_, read_until_position, available());
|
||||
}
|
||||
|
||||
if (whence != SEEK_SET)
|
||||
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed.");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,43 +9,52 @@
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Interpreters/Cache/IFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Interpreters/Cache/LRUFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd.h>
|
||||
#include <Interpreters/Cache/FileSegment.h>
|
||||
#include <Interpreters/Cache/Metadata.h>
|
||||
#include <Interpreters/Cache/QueryLimit.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd_internal.h>
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
|
||||
/// Different caching algorithms are implemented using IFileCachePriority.
|
||||
class FileCache : private boost::noncopyable
|
||||
{
|
||||
|
||||
friend class FileSegment;
|
||||
friend class IFileCachePriority;
|
||||
friend struct FileSegmentsHolder;
|
||||
friend class FileSegmentRangeWriter;
|
||||
|
||||
struct QueryContext;
|
||||
using QueryContextPtr = std::shared_ptr<QueryContext>;
|
||||
|
||||
public:
|
||||
using Key = DB::FileCacheKey;
|
||||
using QueryLimit = DB::FileCacheQueryLimit;
|
||||
using Priority = IFileCachePriority;
|
||||
using PriorityEntry = IFileCachePriority::Entry;
|
||||
using PriorityIterator = IFileCachePriority::Iterator;
|
||||
using PriorityIterationResult = IFileCachePriority::IterationResult;
|
||||
|
||||
explicit FileCache(const FileCacheSettings & settings);
|
||||
|
||||
~FileCache() = default;
|
||||
~FileCache();
|
||||
|
||||
void initialize();
|
||||
|
||||
const String & getBasePath() const { return cache_base_path; }
|
||||
const String & getBasePath() const;
|
||||
|
||||
static Key createKeyForPath(const String & path);
|
||||
|
||||
String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
|
||||
|
||||
String getPathInLocalCache(const Key & key) const;
|
||||
|
||||
/**
|
||||
* Given an `offset` and `size` representing [offset, offset + size) bytes interval,
|
||||
@ -58,8 +67,7 @@ public:
|
||||
* As long as pointers to returned file segments are held
|
||||
* it is guaranteed that these file segments are not removed from cache.
|
||||
*/
|
||||
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
|
||||
FileSegmentsHolder set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
|
||||
FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
|
||||
|
||||
/**
|
||||
* Segments in returned list are ordered in ascending order and represent a full contiguous
|
||||
@ -70,53 +78,40 @@ public:
|
||||
* with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
|
||||
* it's state (and become DOWNLOADED).
|
||||
*/
|
||||
FileSegmentsHolder get(const Key & key, size_t offset, size_t size);
|
||||
FileSegmentsHolderPtr get(const Key & key, size_t offset, size_t size);
|
||||
|
||||
FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
|
||||
|
||||
/// Remove files by `key`. Removes files which might be used at the moment.
|
||||
void removeIfExists(const Key & key);
|
||||
void removeKeyIfExists(const Key & key);
|
||||
|
||||
/// Remove files by `key`. Will not remove files which are used at the moment.
|
||||
void removeIfReleasable();
|
||||
|
||||
static Key hash(const String & path);
|
||||
|
||||
String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
|
||||
|
||||
String getPathInLocalCache(const Key & key) const;
|
||||
void removeAllReleasable();
|
||||
|
||||
std::vector<String> tryGetCachePaths(const Key & key);
|
||||
|
||||
size_t capacity() const { return max_size; }
|
||||
|
||||
size_t getUsedCacheSize() const;
|
||||
|
||||
size_t getFileSegmentsNum() const;
|
||||
|
||||
static bool isReadOnly();
|
||||
size_t getMaxFileSegmentSize() const { return max_file_segment_size; }
|
||||
|
||||
/**
|
||||
* Create a file segment of exactly requested size with EMPTY state.
|
||||
* Throw exception if requested size exceeds max allowed file segment size.
|
||||
* This method is for protected usage: file segment range writer uses it
|
||||
* to dynamically allocate file segments.
|
||||
*/
|
||||
FileSegmentPtr createFileSegmentForDownload(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
const CreateFileSegmentSettings & create_settings,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
bool tryReserve(FileSegment & file_segment, size_t size);
|
||||
|
||||
FileSegments getSnapshot() const;
|
||||
FileSegmentsHolderPtr getSnapshot();
|
||||
|
||||
/// For debug.
|
||||
String dumpStructure(const Key & key);
|
||||
FileSegmentsHolderPtr getSnapshot(const Key & key);
|
||||
|
||||
/// Save a query context information, and adopt different cache policies
|
||||
/// for different queries through the context cache layer.
|
||||
FileSegmentsHolderPtr dumpQueue();
|
||||
|
||||
void cleanup();
|
||||
|
||||
void deactivateBackgroundOperations();
|
||||
|
||||
/// For per query cache limit.
|
||||
struct QueryContextHolder : private boost::noncopyable
|
||||
{
|
||||
QueryContextHolder(const String & query_id_, FileCache * cache_, QueryContextPtr context_);
|
||||
QueryContextHolder(const String & query_id_, FileCache * cache_, QueryLimit::QueryContextPtr context_);
|
||||
|
||||
QueryContextHolder() = default;
|
||||
|
||||
@ -124,198 +119,95 @@ public:
|
||||
|
||||
String query_id;
|
||||
FileCache * cache = nullptr;
|
||||
QueryContextPtr context;
|
||||
QueryLimit::QueryContextPtr context;
|
||||
};
|
||||
using QueryContextHolderPtr = std::unique_ptr<QueryContextHolder>;
|
||||
QueryContextHolderPtr getQueryContextHolder(const String & query_id, const ReadSettings & settings);
|
||||
|
||||
QueryContextHolder getQueryContextHolder(const String & query_id, const ReadSettings & settings);
|
||||
CacheGuard::Lock lockCache() { return cache_guard.lock(); }
|
||||
|
||||
private:
|
||||
String cache_base_path;
|
||||
using KeyAndOffset = FileCacheKeyAndOffset;
|
||||
|
||||
const size_t max_size;
|
||||
const size_t max_element_size;
|
||||
const size_t max_file_segment_size;
|
||||
|
||||
const bool allow_persistent_files;
|
||||
const size_t enable_cache_hits_threshold;
|
||||
const bool enable_filesystem_query_cache_limit;
|
||||
const size_t bypass_cache_threshold = 0;
|
||||
const size_t delayed_cleanup_interval_ms;
|
||||
|
||||
const bool enable_bypass_cache_with_threashold;
|
||||
const size_t bypass_cache_threashold;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
Poco::Logger * log;
|
||||
|
||||
bool is_initialized = false;
|
||||
std::exception_ptr initialization_exception;
|
||||
std::exception_ptr init_exception;
|
||||
std::atomic<bool> is_initialized = false;
|
||||
mutable std::mutex init_mutex;
|
||||
|
||||
void assertInitialized(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
CacheMetadata metadata;
|
||||
|
||||
bool tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
FileCachePriorityPtr main_priority;
|
||||
mutable CacheGuard cache_guard;
|
||||
|
||||
void remove(
|
||||
Key key,
|
||||
size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::unique_lock<std::mutex> & segment_lock);
|
||||
|
||||
void remove(
|
||||
FileSegmentPtr file_segment,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
bool isLastFileSegmentHolder(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::unique_lock<std::mutex> & segment_lock);
|
||||
|
||||
void reduceSizeToDownloaded(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::unique_lock<std::mutex> & segment_lock);
|
||||
|
||||
struct FileSegmentCell : private boost::noncopyable
|
||||
struct HitsCountStash
|
||||
{
|
||||
FileSegmentPtr file_segment;
|
||||
|
||||
/// Iterator is put here on first reservation attempt, if successful.
|
||||
IFileCachePriority::WriteIterator queue_iterator;
|
||||
|
||||
/// Pointer to file segment is always hold by the cache itself.
|
||||
/// Apart from pointer in cache, it can be hold by cache users, when they call
|
||||
/// getorSet(), but cache users always hold it via FileSegmentsHolder.
|
||||
bool releasable() const { return file_segment.unique(); }
|
||||
|
||||
size_t size() const { return file_segment->reserved_size; }
|
||||
|
||||
FileSegmentCell(FileSegmentPtr file_segment_, FileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentCell(FileSegmentCell && other) noexcept
|
||||
: file_segment(std::move(other.file_segment)), queue_iterator(std::move(other.queue_iterator)) {}
|
||||
};
|
||||
|
||||
using AccessKeyAndOffset = std::pair<Key, size_t>;
|
||||
struct KeyAndOffsetHash
|
||||
{
|
||||
std::size_t operator()(const AccessKeyAndOffset & key) const
|
||||
HitsCountStash(size_t hits_threashold_, size_t queue_size_)
|
||||
: hits_threshold(hits_threashold_), queue(std::make_unique<LRUFileCachePriority>(0, queue_size_))
|
||||
{
|
||||
return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
|
||||
if (!queue_size_)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Queue size for hits queue must be non-zero");
|
||||
}
|
||||
|
||||
const size_t hits_threshold;
|
||||
FileCachePriorityPtr queue;
|
||||
using Records = std::unordered_map<KeyAndOffset, PriorityIterator, FileCacheKeyAndOffsetHash>;
|
||||
Records records;
|
||||
};
|
||||
|
||||
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
|
||||
using CachedFiles = std::unordered_map<Key, FileSegmentsByOffset>;
|
||||
using FileCacheRecords = std::unordered_map<AccessKeyAndOffset, IFileCachePriority::WriteIterator, KeyAndOffsetHash>;
|
||||
/**
|
||||
* A HitsCountStash allows to cache certain data only after it reached
|
||||
* a certain hit rate, e.g. if hit rate it 5, then data is cached on 6th cache hit.
|
||||
*/
|
||||
mutable std::unique_ptr<HitsCountStash> stash;
|
||||
/**
|
||||
* A QueryLimit allows to control cache write limit per query.
|
||||
* E.g. if a query needs n bytes from cache, but it has only k bytes, where 0 <= k <= n
|
||||
* then allowed loaded cache size is std::min(n - k, max_query_cache_size).
|
||||
*/
|
||||
FileCacheQueryLimitPtr query_limit;
|
||||
/**
|
||||
* A background cleanup task.
|
||||
* Clears removed cache entries from metadata.
|
||||
*/
|
||||
BackgroundSchedulePool::TaskHolder cleanup_task;
|
||||
|
||||
CachedFiles files;
|
||||
std::unique_ptr<IFileCachePriority> main_priority;
|
||||
void assertInitialized() const;
|
||||
|
||||
FileCacheRecords stash_records;
|
||||
std::unique_ptr<IFileCachePriority> stash_priority;
|
||||
size_t max_stash_element_size;
|
||||
void assertCacheCorrectness();
|
||||
|
||||
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
|
||||
void loadMetadata();
|
||||
|
||||
FileSegments getImpl(const Key & key, const FileSegment::Range & range, std::lock_guard<std::mutex> & cache_lock);
|
||||
FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const;
|
||||
|
||||
FileSegmentCell * getCell(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
/// Returns non-owned pointer to the cell stored in the `files` map.
|
||||
/// Doesn't reserve any space.
|
||||
FileSegmentCell * addCell(
|
||||
const Key & key,
|
||||
FileSegments splitRangeIntoFileSegments(
|
||||
LockedKey & locked_key,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
FileSegment::State state,
|
||||
const CreateFileSegmentSettings & create_settings,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
static void useCell(const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
bool tryReserveForMainList(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
QueryContextPtr query_context,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegments splitRangeIntoCells(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
FileSegment::State state,
|
||||
const CreateFileSegmentSettings & create_settings,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
const CreateFileSegmentSettings & create_settings);
|
||||
|
||||
void fillHolesWithEmptyFileSegments(
|
||||
LockedKey & locked_key,
|
||||
FileSegments & file_segments,
|
||||
const Key & key,
|
||||
const FileSegment::Range & range,
|
||||
bool fill_with_detached_file_segments,
|
||||
const CreateFileSegmentSettings & settings,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
const CreateFileSegmentSettings & settings);
|
||||
|
||||
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
KeyMetadata::iterator addFileSegment(
|
||||
LockedKey & locked_key,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
FileSegment::State state,
|
||||
const CreateFileSegmentSettings & create_settings,
|
||||
const CacheGuard::Lock *);
|
||||
|
||||
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void removeKeyDirectoryIfExists(const Key & key, std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
/// Used to track and control the cache access of each query.
|
||||
/// Through it, we can realize the processing of different queries by the cache layer.
|
||||
struct QueryContext
|
||||
{
|
||||
FileCacheRecords records;
|
||||
FileCachePriorityPtr priority;
|
||||
|
||||
size_t cache_size = 0;
|
||||
size_t max_cache_size;
|
||||
|
||||
bool skip_download_if_exceeds_query_cache;
|
||||
|
||||
QueryContext(size_t max_cache_size_, bool skip_download_if_exceeds_query_cache_)
|
||||
: max_cache_size(max_cache_size_)
|
||||
, skip_download_if_exceeds_query_cache(skip_download_if_exceeds_query_cache_) {}
|
||||
|
||||
size_t getMaxCacheSize() const { return max_cache_size; }
|
||||
|
||||
size_t getCacheSize() const { return cache_size; }
|
||||
|
||||
FileCachePriorityPtr getPriority() const { return priority; }
|
||||
|
||||
bool isSkipDownloadIfExceed() const { return skip_download_if_exceeds_query_cache; }
|
||||
|
||||
void remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
};
|
||||
|
||||
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
|
||||
QueryContextMap query_map;
|
||||
|
||||
QueryContextPtr getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
QueryContextPtr getQueryContext(const String & query_id, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void removeQueryContext(const String & query_id);
|
||||
|
||||
QueryContextPtr getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> &);
|
||||
|
||||
public:
|
||||
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||
void cleanupThreadFunc();
|
||||
};
|
||||
|
||||
}
|
||||
|
31
src/Interpreters/Cache/FileCacheKey.cpp
Normal file
31
src/Interpreters/Cache/FileCacheKey.cpp
Normal file
@ -0,0 +1,31 @@
|
||||
#include "FileCacheKey.h"
|
||||
|
||||
#include <base/hex.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Core/UUID.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
FileCacheKey::FileCacheKey(const std::string & path)
|
||||
: key(sipHash128(path.data(), path.size()))
|
||||
{
|
||||
}
|
||||
|
||||
FileCacheKey::FileCacheKey(const UInt128 & key_)
|
||||
: key(key_)
|
||||
{
|
||||
}
|
||||
|
||||
std::string FileCacheKey::toString() const
|
||||
{
|
||||
return getHexUIntLowercase(key);
|
||||
}
|
||||
|
||||
FileCacheKey FileCacheKey::random()
|
||||
{
|
||||
return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
|
||||
}
|
||||
|
||||
}
|
@ -1,26 +1,37 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
#include <base/hex.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct FileCacheKey
|
||||
{
|
||||
UInt128 key;
|
||||
using KeyHash = UInt128;
|
||||
KeyHash key;
|
||||
|
||||
String toString() const { return getHexUIntLowercase(key); }
|
||||
std::string toString() const;
|
||||
|
||||
FileCacheKey() = default;
|
||||
|
||||
explicit FileCacheKey(const UInt128 & key_) : key(key_) { }
|
||||
explicit FileCacheKey(const std::string & path);
|
||||
|
||||
static FileCacheKey random() { return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); }
|
||||
explicit FileCacheKey(const UInt128 & key_);
|
||||
|
||||
static FileCacheKey random();
|
||||
|
||||
bool operator==(const FileCacheKey & other) const { return key == other.key; }
|
||||
};
|
||||
|
||||
using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
|
||||
struct FileCacheKeyAndOffsetHash
|
||||
{
|
||||
std::size_t operator()(const FileCacheKeyAndOffset & key) const
|
||||
{
|
||||
return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace std
|
||||
@ -32,3 +43,13 @@ struct hash<DB::FileCacheKey>
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<DB::FileCacheKey> : fmt::formatter<std::string>
|
||||
{
|
||||
template <typename FormatCtx>
|
||||
auto format(const DB::FileCacheKey & key, FormatCtx & ctx) const
|
||||
{
|
||||
return fmt::formatter<std::string>::format(key.toString(), ctx);
|
||||
}
|
||||
};
|
||||
|
@ -30,24 +30,26 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
|
||||
if (path.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires non-empty `path` field (cache base path) in config");
|
||||
|
||||
max_elements = config.getUInt64(config_prefix + ".max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
|
||||
max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
|
||||
if (config.has(config_prefix + ".max_file_segment_size"))
|
||||
max_file_segment_size = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".max_file_segment_size"));
|
||||
else
|
||||
max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
|
||||
cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
|
||||
enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
|
||||
enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD);
|
||||
cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
|
||||
|
||||
enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false);
|
||||
|
||||
if (config.has(config_prefix + ".bypass_cache_threashold"))
|
||||
bypass_cache_threashold = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".bypass_cache_threashold"));
|
||||
else
|
||||
bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
|
||||
bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
|
||||
|
||||
do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false);
|
||||
|
||||
delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,18 +13,19 @@ struct FileCacheSettings
|
||||
std::string base_path;
|
||||
|
||||
size_t max_size = 0;
|
||||
size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
|
||||
size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
size_t max_elements = FILECACHE_DEFAULT_MAX_ELEMENTS;
|
||||
size_t max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
|
||||
|
||||
bool cache_on_write_operations = false;
|
||||
|
||||
size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD;
|
||||
size_t cache_hits_threshold = FILECACHE_DEFAULT_HITS_THRESHOLD;
|
||||
bool enable_filesystem_query_cache_limit = false;
|
||||
|
||||
bool do_not_evict_index_and_mark_files = true;
|
||||
|
||||
bool enable_bypass_cache_with_threashold = false;
|
||||
size_t bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
|
||||
size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
|
||||
size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS;
|
||||
|
||||
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
|
||||
};
|
||||
|
@ -4,10 +4,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
|
||||
static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0;
|
||||
static constexpr size_t REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;;
|
||||
static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
|
||||
static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
|
||||
static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
|
||||
static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
|
||||
static constexpr size_t FILECACHE_DELAYED_CLEANUP_INTERVAL_MS = 1000 * 60; /// 1 min
|
||||
|
||||
class FileCache;
|
||||
using FileCachePtr = std::shared_ptr<FileCache>;
|
||||
|
26
src/Interpreters/Cache/FileCache_fwd_internal.h
Normal file
26
src/Interpreters/Cache/FileCache_fwd_internal.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
#include <list>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FileCache;
|
||||
using FileCachePtr = std::shared_ptr<FileCache>;
|
||||
|
||||
class IFileCachePriority;
|
||||
using FileCachePriorityPtr = std::unique_ptr<IFileCachePriority>;
|
||||
|
||||
class FileSegment;
|
||||
using FileSegmentPtr = std::shared_ptr<FileSegment>;
|
||||
using FileSegments = std::list<FileSegmentPtr>;
|
||||
|
||||
struct FileSegmentMetadata;
|
||||
using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
|
||||
|
||||
struct LockedKey;
|
||||
using LockedKeyPtr = std::shared_ptr<LockedKey>;
|
||||
|
||||
struct KeyMetadata;
|
||||
using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -2,13 +2,16 @@
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Interpreters/Cache/Guards.h>
|
||||
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromFileBase.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/OpenedFileCache.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <list>
|
||||
#include <Interpreters/Cache/IFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd_internal.h>
|
||||
#include <queue>
|
||||
|
||||
|
||||
@ -22,14 +25,8 @@ extern const Metric CacheFileSegments;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FileCache;
|
||||
class ReadBufferFromFileBase;
|
||||
|
||||
class FileSegment;
|
||||
using FileSegmentPtr = std::shared_ptr<FileSegment>;
|
||||
using FileSegments = std::list<FileSegmentPtr>;
|
||||
|
||||
|
||||
/*
|
||||
* FileSegmentKind is used to specify the eviction policy for file segments.
|
||||
*/
|
||||
@ -61,17 +58,13 @@ struct CreateFileSegmentSettings
|
||||
CreateFileSegmentSettings() = default;
|
||||
|
||||
explicit CreateFileSegmentSettings(FileSegmentKind kind_, bool unbounded_ = false)
|
||||
: kind(kind_), unbounded(unbounded_)
|
||||
{}
|
||||
: kind(kind_), unbounded(unbounded_) {}
|
||||
};
|
||||
|
||||
class FileSegment : private boost::noncopyable, public std::enable_shared_from_this<FileSegment>
|
||||
{
|
||||
|
||||
friend class FileCache;
|
||||
friend struct FileSegmentsHolder;
|
||||
friend class FileSegmentRangeWriter;
|
||||
friend class StorageSystemFilesystemCache;
|
||||
friend struct LockedKey;
|
||||
friend class FileCache; /// Because of reserved_size in tryReserve().
|
||||
|
||||
public:
|
||||
using Key = FileCacheKey;
|
||||
@ -79,6 +72,7 @@ public:
|
||||
using LocalCacheWriterPtr = std::unique_ptr<WriteBufferFromFile>;
|
||||
using Downloader = std::string;
|
||||
using DownloaderId = std::string;
|
||||
using Priority = IFileCachePriority;
|
||||
|
||||
enum class State
|
||||
{
|
||||
@ -111,18 +105,20 @@ public:
|
||||
* If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
|
||||
* this file segment as out of cache scope.
|
||||
*/
|
||||
SKIP_CACHE,
|
||||
DETACHED,
|
||||
};
|
||||
|
||||
FileSegment(
|
||||
const Key & key_,
|
||||
size_t offset_,
|
||||
size_t size_,
|
||||
const Key & key_,
|
||||
FileCache * cache_,
|
||||
State download_state_,
|
||||
const CreateFileSegmentSettings & create_settings);
|
||||
const CreateFileSegmentSettings & create_settings = {},
|
||||
FileCache * cache_ = nullptr,
|
||||
std::weak_ptr<KeyMetadata> key_metadata_ = std::weak_ptr<KeyMetadata>(),
|
||||
Priority::Iterator queue_iterator_ = Priority::Iterator{});
|
||||
|
||||
~FileSegment();
|
||||
~FileSegment() = default;
|
||||
|
||||
State state() const;
|
||||
|
||||
@ -158,11 +154,10 @@ public:
|
||||
size_t offset() const { return range().left; }
|
||||
|
||||
FileSegmentKind getKind() const { return segment_kind; }
|
||||
bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
|
||||
bool isUnbound() const { return is_unbound; }
|
||||
|
||||
using UniqueId = std::pair<FileCacheKey, size_t>;
|
||||
UniqueId getUniqueId() const { return std::pair(key(), offset()); }
|
||||
bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
|
||||
|
||||
bool isUnbound() const { return is_unbound; }
|
||||
|
||||
String getPathInLocalCache() const;
|
||||
|
||||
@ -177,7 +172,7 @@ public:
|
||||
DownloaderId getDownloader() const;
|
||||
|
||||
/// Wait for the change of state from DOWNLOADING to any other.
|
||||
State wait();
|
||||
State wait(size_t offset);
|
||||
|
||||
bool isDownloaded() const;
|
||||
|
||||
@ -187,11 +182,13 @@ public:
|
||||
|
||||
void incrementHitsCount() { ++hits_count; }
|
||||
|
||||
size_t getCurrentWriteOffset() const;
|
||||
size_t getCurrentWriteOffset(bool sync) const;
|
||||
|
||||
size_t getFirstNonDownloadedOffset() const;
|
||||
size_t getFirstNonDownloadedOffset(bool sync) const;
|
||||
|
||||
size_t getDownloadedSize() const;
|
||||
size_t getDownloadedSize(bool sync) const;
|
||||
|
||||
size_t getReservedSize() const;
|
||||
|
||||
/// Now detached status can be used in the following cases:
|
||||
/// 1. there is only 1 remaining file segment holder
|
||||
@ -207,15 +204,43 @@ public:
|
||||
/// 2. Detached file segment can still be hold by some cache users, but it's state became
|
||||
/// immutable at the point it was detached, any non-const / stateful method will throw an
|
||||
/// exception.
|
||||
void detach(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
|
||||
void detach(const FileSegmentGuard::Lock &, const LockedKey &);
|
||||
|
||||
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
|
||||
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment);
|
||||
|
||||
bool isDetached() const;
|
||||
|
||||
bool isCompleted() const;
|
||||
/// File segment has a completed state, if this state is final and
|
||||
/// is not going to be changed. Completed states: DOWNALODED, DETACHED.
|
||||
bool isCompleted(bool sync = false) const;
|
||||
|
||||
void assertCorrectness() const;
|
||||
void use();
|
||||
|
||||
/**
|
||||
* ========== Methods used by `cache` ========================
|
||||
*/
|
||||
|
||||
FileSegmentGuard::Lock lock() const { return segment_guard.lock(); }
|
||||
|
||||
Priority::Iterator getQueueIterator() const;
|
||||
|
||||
void setQueueIterator(Priority::Iterator iterator);
|
||||
|
||||
KeyMetadataPtr tryGetKeyMetadata() const;
|
||||
|
||||
KeyMetadataPtr getKeyMetadata() const;
|
||||
|
||||
bool assertCorrectness() const;
|
||||
|
||||
/**
|
||||
* ========== Methods that must do cv.notify() ==================
|
||||
*/
|
||||
|
||||
void complete();
|
||||
|
||||
void completePartAndResetDownloader();
|
||||
|
||||
void resetDownloader();
|
||||
|
||||
/**
|
||||
* ========== Methods for _only_ file segment's `downloader` ==================
|
||||
@ -233,16 +258,6 @@ public:
|
||||
/// Write data into reserved space.
|
||||
void write(const char * from, size_t size, size_t offset);
|
||||
|
||||
/// Complete file segment with a certain state.
|
||||
void completeWithState(State state);
|
||||
|
||||
void completeWithoutState();
|
||||
|
||||
/// Complete file segment's part which was last written.
|
||||
void completePartAndResetDownloader();
|
||||
|
||||
void resetDownloader();
|
||||
|
||||
// Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's
|
||||
// available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset().
|
||||
//
|
||||
@ -252,125 +267,112 @@ public:
|
||||
|
||||
RemoteFileReaderPtr extractRemoteFileReader();
|
||||
|
||||
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
||||
|
||||
void resetRemoteFileReader();
|
||||
|
||||
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
||||
|
||||
void setDownloadedSize(size_t delta);
|
||||
|
||||
LocalCacheWriterPtr detachWriter();
|
||||
|
||||
private:
|
||||
size_t getFirstNonDownloadedOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
size_t getCurrentWriteOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
size_t getDownloadedSizeUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
String getDownloaderUnlocked(const FileSegmentGuard::Lock &) const;
|
||||
bool isDownloaderUnlocked(const FileSegmentGuard::Lock & segment_lock) const;
|
||||
void resetDownloaderUnlocked(const FileSegmentGuard::Lock &);
|
||||
|
||||
String getInfoForLogUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void setDownloadState(State state, const FileSegmentGuard::Lock &);
|
||||
void resetDownloadingStateUnlocked(const FileSegmentGuard::Lock &);
|
||||
void setDetachedState(const FileSegmentGuard::Lock &);
|
||||
|
||||
String getDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void resetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
|
||||
void resetDownloadingStateUnlocked(std::unique_lock<std::mutex> & segment_lock);
|
||||
String getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const;
|
||||
|
||||
void setDownloadState(State state);
|
||||
void setDownloadedUnlocked(const FileSegmentGuard::Lock &);
|
||||
void setDownloadFailedUnlocked(const FileSegmentGuard::Lock &);
|
||||
|
||||
void setDownloadedUnlocked(std::unique_lock<std::mutex> & segment_lock);
|
||||
void setDownloadFailedUnlocked(std::unique_lock<std::mutex> & segment_lock);
|
||||
void setDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /* download_lock */, size_t delta);
|
||||
|
||||
bool hasFinalizedStateUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
|
||||
bool isDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
|
||||
bool isDetached(std::unique_lock<std::mutex> & /* segment_lock */) const { return is_detached; }
|
||||
void detachAssumeStateFinalized(std::unique_lock<std::mutex> & segment_lock);
|
||||
[[noreturn]] void throwIfDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
|
||||
void assertDetachedStatus(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void assertNotDetached() const;
|
||||
void assertNotDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void assertIsDownloaderUnlocked(const std::string & operation, std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void assertCorrectnessUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
|
||||
void assertNotDetachedUnlocked(const FileSegmentGuard::Lock &) const;
|
||||
void assertIsDownloaderUnlocked(const std::string & operation, const FileSegmentGuard::Lock &) const;
|
||||
bool assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const;
|
||||
|
||||
/// completeWithoutStateUnlocked() is called from destructor of FileSegmentsHolder.
|
||||
/// Function might check if the caller of the method
|
||||
/// is the last alive holder of the segment. Therefore, completion and destruction
|
||||
/// of the file segment pointer must be done under the same cache mutex.
|
||||
void completeWithoutStateUnlocked(std::lock_guard<std::mutex> & cache_lock);
|
||||
void completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
|
||||
|
||||
void completePartAndResetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
|
||||
|
||||
void wrapWithCacheInfo(Exception & e, const String & message, std::unique_lock<std::mutex> & segment_lock) const;
|
||||
LockedKeyPtr lockKeyMetadata(bool assert_exists = true) const;
|
||||
|
||||
Key file_key;
|
||||
Range segment_range;
|
||||
const FileSegmentKind segment_kind;
|
||||
/// Size of the segment is not known until it is downloaded and
|
||||
/// can be bigger than max_file_segment_size.
|
||||
const bool is_unbound = false;
|
||||
|
||||
State download_state;
|
||||
|
||||
/// The one who prepares the download
|
||||
DownloaderId downloader_id;
|
||||
std::atomic<State> download_state;
|
||||
DownloaderId downloader_id; /// The one who prepares the download
|
||||
|
||||
RemoteFileReaderPtr remote_file_reader;
|
||||
LocalCacheWriterPtr cache_writer;
|
||||
bool detached_writer = false;
|
||||
|
||||
/// downloaded_size should always be less or equal to reserved_size
|
||||
size_t downloaded_size = 0;
|
||||
size_t reserved_size = 0;
|
||||
|
||||
/// global locking order rule:
|
||||
/// 1. cache lock
|
||||
/// 2. segment lock
|
||||
|
||||
mutable std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
|
||||
/// Protects downloaded_size access with actual write into fs.
|
||||
/// downloaded_size is not protected by download_mutex in methods which
|
||||
/// can never be run in parallel to FileSegment::write() method
|
||||
/// as downloaded_size is updated only in FileSegment::write() method.
|
||||
/// Such methods are identified by isDownloader() check at their start,
|
||||
/// e.g. they are executed strictly by the same thread, sequentially.
|
||||
std::atomic<size_t> downloaded_size = 0;
|
||||
std::atomic<size_t> reserved_size = 0;
|
||||
mutable std::mutex download_mutex;
|
||||
|
||||
Key file_key;
|
||||
mutable FileSegmentGuard segment_guard;
|
||||
std::weak_ptr<KeyMetadata> key_metadata;
|
||||
mutable Priority::Iterator queue_iterator; /// Iterator is put here on first reservation attempt, if successful.
|
||||
FileCache * cache;
|
||||
std::condition_variable cv;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
/// "detached" file segment means that it is not owned by cache ("detached" from cache).
|
||||
/// In general case, all file segments are owned by cache.
|
||||
bool is_detached = false;
|
||||
bool is_completed = false;
|
||||
|
||||
bool is_downloaded = false;
|
||||
|
||||
std::atomic<size_t> hits_count = 0; /// cache hits.
|
||||
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
|
||||
|
||||
FileSegmentKind segment_kind;
|
||||
|
||||
/// Size of the segment is not known until it is downloaded and can be bigger than max_file_segment_size.
|
||||
bool is_unbound = false;
|
||||
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments};
|
||||
};
|
||||
|
||||
|
||||
struct FileSegmentsHolder : private boost::noncopyable
|
||||
{
|
||||
FileSegmentsHolder() = default;
|
||||
|
||||
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
|
||||
|
||||
FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
|
||||
|
||||
void reset();
|
||||
bool empty() const { return file_segments.empty(); }
|
||||
explicit FileSegmentsHolder(FileSegments && file_segments_, bool complete_on_dtor_ = true)
|
||||
: file_segments(std::move(file_segments_)), complete_on_dtor(complete_on_dtor_) {}
|
||||
|
||||
~FileSegmentsHolder();
|
||||
|
||||
bool empty() const { return file_segments.empty(); }
|
||||
|
||||
size_t size() const { return file_segments.size(); }
|
||||
|
||||
String toString();
|
||||
|
||||
void popFront() { completeAndPopFrontImpl(); }
|
||||
|
||||
FileSegment & front() { return *file_segments.front(); }
|
||||
|
||||
FileSegment & back() { return *file_segments.back(); }
|
||||
|
||||
FileSegment & add(FileSegmentPtr && file_segment)
|
||||
{
|
||||
file_segments.push_back(file_segment);
|
||||
return *file_segments.back();
|
||||
}
|
||||
|
||||
FileSegments::iterator begin() { return file_segments.begin(); }
|
||||
FileSegments::iterator end() { return file_segments.end(); }
|
||||
|
||||
FileSegments::const_iterator begin() const { return file_segments.begin(); }
|
||||
FileSegments::const_iterator end() const { return file_segments.end(); }
|
||||
|
||||
void moveTo(FileSegmentsHolder & holder)
|
||||
{
|
||||
holder.file_segments.insert(holder.file_segments.end(), file_segments.begin(), file_segments.end());
|
||||
file_segments.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
FileSegments file_segments{};
|
||||
const bool complete_on_dtor = true;
|
||||
|
||||
FileSegments::iterator completeAndPopFrontImpl();
|
||||
};
|
||||
|
||||
using FileSegmentsHolderPtr = std::unique_ptr<FileSegmentsHolder>;
|
||||
|
||||
}
|
||||
|
117
src/Interpreters/Cache/Guards.h
Normal file
117
src/Interpreters/Cache/Guards.h
Normal file
@ -0,0 +1,117 @@
|
||||
#pragma once
|
||||
#include <mutex>
|
||||
#include <Interpreters/Cache/FileCache_fwd.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/**
|
||||
* FileCache::get/getOrSet/set
|
||||
* 1. CacheMetadataGuard::Lock (take key lock and release metadata lock)
|
||||
* 2. KeyGuard::Lock (hold till the end of the method)
|
||||
*
|
||||
* FileCache::tryReserve
|
||||
* 1. CacheGuard::Lock
|
||||
* 2. KeyGuard::Lock (taken without metadata lock)
|
||||
* 3. any number of KeyGuard::Lock's for files which are going to be evicted (taken via metadata lock)
|
||||
*
|
||||
* FileCache::removeIfExists
|
||||
* 1. CacheGuard::Lock
|
||||
* 2. KeyGuard::Lock (taken via metadata lock)
|
||||
* 3. FileSegmentGuard::Lock
|
||||
*
|
||||
* FileCache::removeAllReleasable
|
||||
* 1. CacheGuard::Lock
|
||||
* 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
|
||||
* 3. FileSegmentGuard::Lock
|
||||
*
|
||||
* FileCache::getSnapshot (for all cache)
|
||||
* 1. metadata lock
|
||||
* 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
|
||||
* 3. FileSegmentGuard::Lock
|
||||
*
|
||||
* FileCache::getSnapshot(key)
|
||||
* 1. KeyGuard::Lock (taken via metadata lock)
|
||||
* 2. FileSegmentGuard::Lock
|
||||
*
|
||||
* FileSegment::complete
|
||||
* 1. CacheGuard::Lock
|
||||
* 2. KeyGuard::Lock (taken without metadata lock)
|
||||
* 3. FileSegmentGuard::Lock
|
||||
*
|
||||
* Rules:
|
||||
* 1. Priority of locking: CacheGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock
|
||||
* 2. If we take more than one key lock at a moment of time, we need to take CacheGuard::Lock (example: tryReserve())
|
||||
*
|
||||
*
|
||||
* _CacheGuard_
|
||||
* 1. FileCache::tryReserve
|
||||
* 2. FileCache::removeIfExists(key)
|
||||
* 3. FileCache::removeAllReleasable
|
||||
* 4. FileSegment::complete
|
||||
*
|
||||
* _KeyGuard_ _CacheMetadataGuard_
|
||||
* 1. all from CacheGuard 1. getOrSet/get/set
|
||||
* 2. getOrSet/get/Set
|
||||
*
|
||||
* *This table does not include locks taken for introspection and system tables.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Cache priority queue guard.
|
||||
*/
|
||||
struct CacheGuard : private boost::noncopyable
|
||||
{
|
||||
struct Lock : public std::unique_lock<std::mutex>
|
||||
{
|
||||
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
|
||||
};
|
||||
|
||||
Lock lock() { return Lock(mutex); }
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Guard for cache metadata.
|
||||
*/
|
||||
struct CacheMetadataGuard : private boost::noncopyable
|
||||
{
|
||||
struct Lock : public std::unique_lock<std::mutex>
|
||||
{
|
||||
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
|
||||
};
|
||||
|
||||
Lock lock() { return Lock(mutex); }
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Key guard. A separate guard for each cache key.
|
||||
*/
|
||||
struct KeyGuard : private boost::noncopyable
|
||||
{
|
||||
struct Lock : public std::unique_lock<std::mutex>
|
||||
{
|
||||
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
|
||||
};
|
||||
|
||||
Lock lock() { return Lock(mutex); }
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
/**
|
||||
* Guard for a file segment.
|
||||
*/
|
||||
struct FileSegmentGuard : private boost::noncopyable
|
||||
{
|
||||
struct Lock : public std::unique_lock<std::mutex>
|
||||
{
|
||||
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
|
||||
};
|
||||
|
||||
Lock lock() { return Lock(mutex); }
|
||||
std::mutex mutex;
|
||||
};
|
||||
|
||||
}
|
@ -5,33 +5,35 @@
|
||||
#include <Core/Types.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Interpreters/Cache/Guards.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd_internal.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IFileCachePriority;
|
||||
using FileCachePriorityPtr = std::shared_ptr<IFileCachePriority>;
|
||||
|
||||
/// IFileCachePriority is used to maintain the priority of cached data.
|
||||
class IFileCachePriority
|
||||
class IFileCachePriority : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
class IIterator;
|
||||
using Key = FileCacheKey;
|
||||
using ReadIterator = std::unique_ptr<const IIterator>;
|
||||
using WriteIterator = std::shared_ptr<IIterator>;
|
||||
using KeyAndOffset = FileCacheKeyAndOffset;
|
||||
|
||||
struct FileCacheRecord
|
||||
struct Entry
|
||||
{
|
||||
Key key;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
size_t hits = 0;
|
||||
Entry(const Key & key_, size_t offset_, size_t size_, KeyMetadataPtr key_metadata_)
|
||||
: key(key_), offset(offset_), size(size_), key_metadata(key_metadata_) {}
|
||||
|
||||
FileCacheRecord(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) { }
|
||||
Entry(const Entry & other)
|
||||
: key(other.key), offset(other.offset), size(other.size.load()), hits(other.hits), key_metadata(other.key_metadata) {}
|
||||
|
||||
const Key key;
|
||||
const size_t offset;
|
||||
std::atomic<size_t> size;
|
||||
size_t hits = 0;
|
||||
const KeyMetadataPtr key_metadata;
|
||||
};
|
||||
|
||||
/// It provides an iterator to traverse the cache priority. Under normal circumstances,
|
||||
/// Provides an iterator to traverse the cache priority. Under normal circumstances,
|
||||
/// the iterator can only return the records that have been directly swapped out.
|
||||
/// For example, in the LRU algorithm, it can traverse all records, but in the LRU-K, it
|
||||
/// can only traverse the records in the low priority queue.
|
||||
@ -40,56 +42,54 @@ public:
|
||||
public:
|
||||
virtual ~IIterator() = default;
|
||||
|
||||
virtual const Key & key() const = 0;
|
||||
virtual size_t use(const CacheGuard::Lock &) = 0;
|
||||
|
||||
virtual size_t offset() const = 0;
|
||||
virtual std::shared_ptr<IIterator> remove(const CacheGuard::Lock &) = 0;
|
||||
|
||||
virtual size_t size() const = 0;
|
||||
virtual const Entry & getEntry() const = 0;
|
||||
|
||||
virtual size_t hits() const = 0;
|
||||
virtual Entry & getEntry() = 0;
|
||||
|
||||
/// Point the iterator to the next higher priority cache record.
|
||||
virtual void next() const = 0;
|
||||
virtual void annul() = 0;
|
||||
|
||||
virtual bool valid() const = 0;
|
||||
|
||||
/// Mark a cache record as recently used, it will update the priority
|
||||
/// of the cache record according to different cache algorithms.
|
||||
virtual void use(std::lock_guard<std::mutex> &) = 0;
|
||||
|
||||
/// Deletes an existing cached record. And to avoid pointer suspension
|
||||
/// the iterator should automatically point to the next record.
|
||||
virtual void removeAndGetNext(std::lock_guard<std::mutex> &) = 0;
|
||||
|
||||
virtual void updateSize(int64_t, std::lock_guard<std::mutex> &) = 0;
|
||||
virtual void updateSize(int64_t size) = 0;
|
||||
};
|
||||
|
||||
public:
|
||||
using Iterator = std::shared_ptr<IIterator>;
|
||||
using ConstIterator = std::shared_ptr<const IIterator>;
|
||||
|
||||
enum class IterationResult
|
||||
{
|
||||
BREAK,
|
||||
CONTINUE,
|
||||
REMOVE_AND_CONTINUE,
|
||||
};
|
||||
using IterateFunc = std::function<IterationResult(LockedKey &, FileSegmentMetadataPtr)>;
|
||||
|
||||
IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) {}
|
||||
|
||||
virtual ~IFileCachePriority() = default;
|
||||
|
||||
/// Add a cache record that did not exist before, and throw a
|
||||
/// logical exception if the cache block already exists.
|
||||
virtual WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
size_t getElementsLimit() const { return max_elements; }
|
||||
|
||||
/// This method is used for assertions in debug mode. So we do not care about complexity here.
|
||||
/// Query whether a cache record exists. If it exists, return true. If not, return false.
|
||||
virtual bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
size_t getSizeLimit() const { return max_size; }
|
||||
|
||||
virtual void removeAll(std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
virtual size_t getSize(const CacheGuard::Lock &) const = 0;
|
||||
|
||||
/// Returns an iterator pointing to the lowest priority cached record.
|
||||
/// We can traverse all cached records through the iterator's next().
|
||||
virtual ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
|
||||
|
||||
/// The same as getLowestPriorityReadIterator(), but it is writeable.
|
||||
virtual WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
virtual Iterator add(
|
||||
KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
|
||||
|
||||
virtual size_t getElementsNum(std::lock_guard<std::mutex> & cache_lock) const = 0;
|
||||
virtual void pop(const CacheGuard::Lock &) = 0;
|
||||
|
||||
size_t getCacheSize(std::lock_guard<std::mutex> &) const { return cache_size; }
|
||||
virtual void removeAll(const CacheGuard::Lock &) = 0;
|
||||
|
||||
protected:
|
||||
size_t max_cache_size = 0;
|
||||
size_t cache_size = 0;
|
||||
virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
|
||||
|
||||
private:
|
||||
const size_t max_size = 0;
|
||||
const size_t max_elements = 0;
|
||||
};
|
||||
|
||||
};
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <Interpreters/Cache/LRUFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCache.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
@ -16,8 +18,13 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &)
|
||||
IFileCachePriority::Iterator LRUFileCachePriority::add(
|
||||
KeyMetadataPtr key_metadata,
|
||||
size_t offset,
|
||||
size_t size,
|
||||
const CacheGuard::Lock &)
|
||||
{
|
||||
const auto & key = key_metadata->key;
|
||||
#ifndef NDEBUG
|
||||
for (const auto & entry : queue)
|
||||
{
|
||||
@ -25,40 +32,56 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
|
||||
entry.key.toString(), entry.offset, entry.size);
|
||||
entry.key, entry.offset, entry.size);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto iter = queue.insert(queue.end(), FileCacheRecord(key, offset, size));
|
||||
cache_size += size;
|
||||
const auto & size_limit = getSizeLimit();
|
||||
if (size_limit && current_size + size > size_limit)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Not enough space to add {}:{} with size {}: current size: {}/{}",
|
||||
key, offset, size, current_size, getSizeLimit());
|
||||
}
|
||||
|
||||
current_size += size;
|
||||
|
||||
auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata));
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
|
||||
CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements);
|
||||
|
||||
LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
|
||||
LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key, offset);
|
||||
|
||||
return std::make_shared<LRUFileCacheIterator>(this, iter);
|
||||
}
|
||||
|
||||
bool LRUFileCachePriority::contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &)
|
||||
void LRUFileCachePriority::removeAll(const CacheGuard::Lock &)
|
||||
{
|
||||
for (const auto & record : queue)
|
||||
{
|
||||
if (key == record.key && offset == record.offset)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::removeAll(std::lock_guard<std::mutex> &)
|
||||
{
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size);
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, current_size);
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size());
|
||||
|
||||
LOG_TEST(log, "Removed all entries from LRU queue");
|
||||
|
||||
queue.clear();
|
||||
cache_size = 0;
|
||||
current_size = 0;
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::pop(const CacheGuard::Lock &)
|
||||
{
|
||||
remove(queue.begin());
|
||||
}
|
||||
|
||||
LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it)
|
||||
{
|
||||
current_size -= it->size;
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size);
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
|
||||
|
||||
LOG_TEST(log, "Removed entry from LRU queue, key: {}, offset: {}", it->key, it->offset);
|
||||
return queue.erase(it);
|
||||
}
|
||||
|
||||
LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
|
||||
@ -67,36 +90,67 @@ LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
|
||||
{
|
||||
}
|
||||
|
||||
IFileCachePriority::ReadIterator LRUFileCachePriority::getLowestPriorityReadIterator(std::lock_guard<std::mutex> &)
|
||||
void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &)
|
||||
{
|
||||
return std::make_unique<const LRUFileCacheIterator>(this, queue.begin());
|
||||
for (auto it = queue.begin(); it != queue.end();)
|
||||
{
|
||||
auto locked_key = it->key_metadata->tryLock();
|
||||
if (!locked_key || it->size == 0)
|
||||
{
|
||||
it = remove(it);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto metadata = locked_key->tryGetByOffset(it->offset);
|
||||
if (!metadata)
|
||||
{
|
||||
it = remove(it);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (metadata->size() != it->size)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Mismatch of file segment size in file segment metadata and priority queue: {} != {} ({})",
|
||||
it->size, metadata->size(), metadata->file_segment->getInfoForLog());
|
||||
}
|
||||
|
||||
auto result = func(*locked_key, metadata);
|
||||
switch (result)
|
||||
{
|
||||
case IterationResult::BREAK:
|
||||
{
|
||||
return;
|
||||
}
|
||||
case IterationResult::CONTINUE:
|
||||
{
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
case IterationResult::REMOVE_AND_CONTINUE:
|
||||
{
|
||||
it = remove(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IFileCachePriority::WriteIterator LRUFileCachePriority::getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &)
|
||||
LRUFileCachePriority::Iterator LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &)
|
||||
{
|
||||
return std::make_shared<LRUFileCacheIterator>(this, queue.begin());
|
||||
return std::make_shared<LRUFileCacheIterator>(cache_priority, cache_priority->remove(queue_iter));
|
||||
}
|
||||
|
||||
size_t LRUFileCachePriority::getElementsNum(std::lock_guard<std::mutex> &) const
|
||||
void LRUFileCachePriority::LRUFileCacheIterator::annul()
|
||||
{
|
||||
return queue.size();
|
||||
cache_priority->current_size -= queue_iter->size;
|
||||
queue_iter->size = 0;
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guard<std::mutex> &)
|
||||
void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
|
||||
{
|
||||
cache_priority->cache_size -= queue_iter->size;
|
||||
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size);
|
||||
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
|
||||
|
||||
LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
|
||||
|
||||
queue_iter = cache_priority->queue.erase(queue_iter);
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size, std::lock_guard<std::mutex> &)
|
||||
{
|
||||
cache_priority->cache_size += size;
|
||||
cache_priority->current_size += size;
|
||||
|
||||
if (size > 0)
|
||||
CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
|
||||
@ -105,14 +159,14 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size, std::l
|
||||
|
||||
queue_iter->size += size;
|
||||
|
||||
chassert(queue_iter->size > 0);
|
||||
chassert(cache_priority->cache_size >= 0);
|
||||
chassert(cache_priority->current_size >= 0);
|
||||
chassert(queue_iter->size >= 0);
|
||||
}
|
||||
|
||||
void LRUFileCachePriority::LRUFileCacheIterator::use(std::lock_guard<std::mutex> &)
|
||||
size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
|
||||
{
|
||||
queue_iter->hits++;
|
||||
cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
|
||||
return ++queue_iter->hits;
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <list>
|
||||
#include <Interpreters/Cache/IFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -12,51 +14,51 @@ class LRUFileCachePriority : public IFileCachePriority
|
||||
{
|
||||
private:
|
||||
class LRUFileCacheIterator;
|
||||
using LRUQueue = std::list<FileCacheRecord>;
|
||||
using LRUQueue = std::list<Entry>;
|
||||
using LRUQueueIterator = typename LRUQueue::iterator;
|
||||
|
||||
public:
|
||||
LRUFileCachePriority() = default;
|
||||
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
|
||||
|
||||
WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &) override;
|
||||
size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
|
||||
|
||||
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &) override;
|
||||
size_t getElementsCount(const CacheGuard::Lock &) const override { return queue.size(); }
|
||||
|
||||
void removeAll(std::lock_guard<std::mutex> &) override;
|
||||
Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
|
||||
|
||||
ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> &) override;
|
||||
void pop(const CacheGuard::Lock &) override;
|
||||
|
||||
WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &) override;
|
||||
void removeAll(const CacheGuard::Lock &) override;
|
||||
|
||||
size_t getElementsNum(std::lock_guard<std::mutex> &) const override;
|
||||
void iterate(IterateFunc && func, const CacheGuard::Lock &) override;
|
||||
|
||||
private:
|
||||
LRUQueue queue;
|
||||
Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority");
|
||||
|
||||
std::atomic<size_t> current_size = 0;
|
||||
|
||||
LRUQueueIterator remove(LRUQueueIterator it);
|
||||
};
|
||||
|
||||
class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
|
||||
{
|
||||
public:
|
||||
LRUFileCacheIterator(LRUFileCachePriority * cache_priority_, LRUFileCachePriority::LRUQueueIterator queue_iter_);
|
||||
LRUFileCacheIterator(
|
||||
LRUFileCachePriority * cache_priority_,
|
||||
LRUFileCachePriority::LRUQueueIterator queue_iter_);
|
||||
|
||||
void next() const override { queue_iter++; }
|
||||
const Entry & getEntry() const override { return *queue_iter; }
|
||||
|
||||
bool valid() const override { return queue_iter != cache_priority->queue.end(); }
|
||||
Entry & getEntry() override { return *queue_iter; }
|
||||
|
||||
const Key & key() const override { return queue_iter->key; }
|
||||
size_t use(const CacheGuard::Lock &) override;
|
||||
|
||||
size_t offset() const override { return queue_iter->offset; }
|
||||
Iterator remove(const CacheGuard::Lock &) override;
|
||||
|
||||
size_t size() const override { return queue_iter->size; }
|
||||
void annul() override;
|
||||
|
||||
size_t hits() const override { return queue_iter->hits; }
|
||||
|
||||
void removeAndGetNext(std::lock_guard<std::mutex> &) override;
|
||||
|
||||
void updateSize(int64_t size, std::lock_guard<std::mutex> &) override;
|
||||
|
||||
void use(std::lock_guard<std::mutex> &) override;
|
||||
void updateSize(int64_t size) override;
|
||||
|
||||
private:
|
||||
LRUFileCachePriority * cache_priority;
|
||||
|
468
src/Interpreters/Cache/Metadata.cpp
Normal file
468
src/Interpreters/Cache/Metadata.cpp
Normal file
@ -0,0 +1,468 @@
|
||||
#include <Interpreters/Cache/Metadata.h>
|
||||
#include <Interpreters/Cache/FileCache.h>
|
||||
#include <Interpreters/Cache/FileSegment.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_)
|
||||
: file_segment(std::move(file_segment_))
|
||||
{
|
||||
switch (file_segment->state())
|
||||
{
|
||||
case FileSegment::State::DOWNLOADED:
|
||||
{
|
||||
chassert(file_segment->getQueueIterator());
|
||||
break;
|
||||
}
|
||||
case FileSegment::State::EMPTY:
|
||||
case FileSegment::State::DOWNLOADING:
|
||||
{
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Can create file segment with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
|
||||
FileSegment::stateToString(file_segment->state()));
|
||||
}
|
||||
}
|
||||
|
||||
size_t FileSegmentMetadata::size() const
|
||||
{
|
||||
return file_segment->getReservedSize();
|
||||
}
|
||||
|
||||
KeyMetadata::KeyMetadata(
|
||||
const Key & key_,
|
||||
const std::string & key_path_,
|
||||
CleanupQueue & cleanup_queue_,
|
||||
bool created_base_directory_)
|
||||
: key(key_)
|
||||
, key_path(key_path_)
|
||||
, cleanup_queue(cleanup_queue_)
|
||||
, created_base_directory(created_base_directory_)
|
||||
{
|
||||
if (created_base_directory)
|
||||
chassert(fs::exists(key_path));
|
||||
}
|
||||
|
||||
LockedKeyPtr KeyMetadata::lock()
|
||||
{
|
||||
auto locked = tryLock();
|
||||
if (locked)
|
||||
return locked;
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot lock key {} (state: {})", key, magic_enum::enum_name(key_state));
|
||||
}
|
||||
|
||||
LockedKeyPtr KeyMetadata::tryLock()
|
||||
{
|
||||
auto locked = std::make_unique<LockedKey>(shared_from_this());
|
||||
if (key_state == KeyMetadata::KeyState::ACTIVE)
|
||||
return locked;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool KeyMetadata::createBaseDirectory()
|
||||
{
|
||||
if (!created_base_directory.exchange(true))
|
||||
{
|
||||
try
|
||||
{
|
||||
fs::create_directories(key_path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// Avoid errors like
|
||||
/// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device
|
||||
/// and mark file segment with SKIP_CACHE state
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
created_base_directory = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment)
|
||||
{
|
||||
return fs::path(key_path)
|
||||
/ CacheMetadata::getFileNameForFileSegment(file_segment.offset(), file_segment.getKind());
|
||||
}
|
||||
|
||||
|
||||
class CleanupQueue
|
||||
{
|
||||
friend struct CacheMetadata;
|
||||
public:
|
||||
void add(const FileCacheKey & key);
|
||||
void remove(const FileCacheKey & key);
|
||||
size_t getSize() const;
|
||||
|
||||
private:
|
||||
bool tryPop(FileCacheKey & key);
|
||||
|
||||
std::unordered_set<FileCacheKey> keys;
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
|
||||
CacheMetadata::CacheMetadata(const std::string & path_)
|
||||
: path(path_)
|
||||
, cleanup_queue(std::make_unique<CleanupQueue>())
|
||||
, log(&Poco::Logger::get("CacheMetadata"))
|
||||
{
|
||||
}
|
||||
|
||||
String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind)
|
||||
{
|
||||
String file_suffix;
|
||||
switch (segment_kind)
|
||||
{
|
||||
case FileSegmentKind::Persistent:
|
||||
file_suffix = "_persistent";
|
||||
break;
|
||||
case FileSegmentKind::Temporary:
|
||||
file_suffix = "_temporary";
|
||||
break;
|
||||
case FileSegmentKind::Regular:
|
||||
file_suffix = "";
|
||||
break;
|
||||
}
|
||||
return std::to_string(offset) + file_suffix;
|
||||
}
|
||||
|
||||
String CacheMetadata::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
|
||||
{
|
||||
String file_suffix;
|
||||
|
||||
const auto key_str = key.toString();
|
||||
return fs::path(path) / key_str.substr(0, 3) / key_str / getFileNameForFileSegment(offset, segment_kind);
|
||||
}
|
||||
|
||||
String CacheMetadata::getPathInLocalCache(const Key & key) const
|
||||
{
|
||||
const auto key_str = key.toString();
|
||||
return fs::path(path) / key_str.substr(0, 3) / key_str;
|
||||
}
|
||||
|
||||
LockedKeyPtr CacheMetadata::lockKeyMetadata(
|
||||
const FileCacheKey & key,
|
||||
KeyNotFoundPolicy key_not_found_policy,
|
||||
bool is_initial_load)
|
||||
{
|
||||
KeyMetadataPtr key_metadata;
|
||||
{
|
||||
auto lock = guard.lock();
|
||||
|
||||
auto it = find(key);
|
||||
if (it == end())
|
||||
{
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
|
||||
else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
|
||||
return nullptr;
|
||||
|
||||
it = emplace(
|
||||
key, std::make_shared<KeyMetadata>(
|
||||
key, getPathInLocalCache(key), *cleanup_queue, is_initial_load)).first;
|
||||
}
|
||||
|
||||
key_metadata = it->second;
|
||||
}
|
||||
|
||||
{
|
||||
auto locked_metadata = std::make_unique<LockedKey>(key_metadata);
|
||||
const auto key_state = locked_metadata->getKeyState();
|
||||
|
||||
if (key_state == KeyMetadata::KeyState::ACTIVE)
|
||||
return locked_metadata;
|
||||
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
|
||||
|
||||
if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
|
||||
return nullptr;
|
||||
|
||||
if (key_state == KeyMetadata::KeyState::REMOVING)
|
||||
{
|
||||
locked_metadata->removeFromCleanupQueue();
|
||||
return locked_metadata;
|
||||
}
|
||||
|
||||
chassert(key_state == KeyMetadata::KeyState::REMOVED);
|
||||
chassert(key_not_found_policy == KeyNotFoundPolicy::CREATE_EMPTY);
|
||||
}
|
||||
|
||||
/// Not we are at a case:
|
||||
/// key_state == KeyMetadata::KeyState::REMOVED
|
||||
/// and KeyNotFoundPolicy == CREATE_EMPTY
|
||||
/// Retry.
|
||||
return lockKeyMetadata(key, key_not_found_policy);
|
||||
}
|
||||
|
||||
void CacheMetadata::iterate(IterateCacheMetadataFunc && func)
|
||||
{
|
||||
auto lock = guard.lock();
|
||||
for (const auto & [key, key_metadata] : *this)
|
||||
{
|
||||
auto locked_key = std::make_unique<LockedKey>(key_metadata);
|
||||
const auto key_state = locked_key->getKeyState();
|
||||
|
||||
if (key_state == KeyMetadata::KeyState::ACTIVE)
|
||||
{
|
||||
func(*locked_key);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (key_state == KeyMetadata::KeyState::REMOVING)
|
||||
continue;
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Cannot lock key {}: key does not exist", key_metadata->key);
|
||||
}
|
||||
}
|
||||
|
||||
void CacheMetadata::doCleanup()
|
||||
{
|
||||
auto lock = guard.lock();
|
||||
|
||||
/// Let's mention this case.
|
||||
/// This metadata cleanup is delayed so what is we marked key as deleted and
|
||||
/// put it to deletion queue, but then the same key was added to cache before
|
||||
/// we actually performed this delayed removal?
|
||||
/// In this case it will work fine because on each attempt to add any key to cache
|
||||
/// we perform this delayed removal.
|
||||
|
||||
FileCacheKey cleanup_key;
|
||||
while (cleanup_queue->tryPop(cleanup_key))
|
||||
{
|
||||
auto it = find(cleanup_key);
|
||||
if (it == end())
|
||||
continue;
|
||||
|
||||
auto locked_metadata = std::make_unique<LockedKey>(it->second);
|
||||
const auto key_state = locked_metadata->getKeyState();
|
||||
|
||||
if (key_state == KeyMetadata::KeyState::ACTIVE)
|
||||
{
|
||||
/// Key was added back to cache after we submitted it to removal queue.
|
||||
continue;
|
||||
}
|
||||
|
||||
locked_metadata->markAsRemoved();
|
||||
erase(it);
|
||||
|
||||
try
|
||||
{
|
||||
const fs::path key_directory = getPathInLocalCache(cleanup_key);
|
||||
if (fs::exists(key_directory))
|
||||
fs::remove_all(key_directory);
|
||||
|
||||
const fs::path key_prefix_directory = key_directory.parent_path();
|
||||
if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
|
||||
fs::remove_all(key_prefix_directory);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
chassert(false);
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LockedKey::LockedKey(std::shared_ptr<KeyMetadata> key_metadata_)
|
||||
: key_metadata(key_metadata_)
|
||||
, lock(key_metadata->guard.lock())
|
||||
, log(&Poco::Logger::get("LockedKey"))
|
||||
{
|
||||
}
|
||||
|
||||
LockedKey::~LockedKey()
|
||||
{
|
||||
if (!key_metadata->empty())
|
||||
return;
|
||||
|
||||
key_metadata->key_state = KeyMetadata::KeyState::REMOVING;
|
||||
key_metadata->cleanup_queue.add(getKey());
|
||||
}
|
||||
|
||||
void LockedKey::removeFromCleanupQueue()
|
||||
{
|
||||
if (key_metadata->key_state != KeyMetadata::KeyState::REMOVING)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove non-removing");
|
||||
|
||||
/// Just mark key_state as "not to be removed", the cleanup thread will check it and skip the key.
|
||||
key_metadata->key_state = KeyMetadata::KeyState::ACTIVE;
|
||||
}
|
||||
|
||||
void LockedKey::markAsRemoved()
|
||||
{
|
||||
key_metadata->key_state = KeyMetadata::KeyState::REMOVED;
|
||||
}
|
||||
|
||||
bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const
|
||||
{
|
||||
const auto file_segment_metadata = getByOffset(offset);
|
||||
return file_segment_metadata->file_segment.use_count() == 2;
|
||||
}
|
||||
|
||||
void LockedKey::removeAllReleasable()
|
||||
{
|
||||
for (auto it = key_metadata->begin(); it != key_metadata->end();)
|
||||
{
|
||||
if (!it->second->releasable())
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto file_segment = it->second->file_segment;
|
||||
it = removeFileSegment(file_segment->offset(), file_segment->lock());
|
||||
}
|
||||
}
|
||||
|
||||
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock)
|
||||
{
|
||||
LOG_DEBUG(log, "Remove from cache. Key: {}, offset: {}", getKey(), offset);
|
||||
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset);
|
||||
|
||||
auto file_segment = it->second->file_segment;
|
||||
if (file_segment->queue_iterator)
|
||||
file_segment->queue_iterator->annul();
|
||||
|
||||
const auto path = key_metadata->getFileSegmentPath(*file_segment);
|
||||
if (fs::exists(path))
|
||||
fs::remove(path);
|
||||
|
||||
file_segment->detach(segment_lock, *this);
|
||||
return key_metadata->erase(it);
|
||||
}
|
||||
|
||||
void LockedKey::shrinkFileSegmentToDownloadedSize(
|
||||
size_t offset,
|
||||
const FileSegmentGuard::Lock & segment_lock)
|
||||
{
|
||||
/**
|
||||
* In case file was partially downloaded and it's download cannot be continued
|
||||
* because of no space left in cache, we need to be able to cut file segment's size to downloaded_size.
|
||||
*/
|
||||
|
||||
auto metadata = getByOffset(offset);
|
||||
const auto & file_segment = metadata->file_segment;
|
||||
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
|
||||
|
||||
const size_t downloaded_size = file_segment->getDownloadedSize(false);
|
||||
if (downloaded_size == file_segment->range().size())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Nothing to reduce, file segment fully downloaded: {}",
|
||||
file_segment->getInfoForLogUnlocked(segment_lock));
|
||||
}
|
||||
|
||||
int64_t diff = file_segment->reserved_size - downloaded_size;
|
||||
|
||||
metadata->file_segment = std::make_shared<FileSegment>(
|
||||
getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED,
|
||||
CreateFileSegmentSettings(file_segment->getKind()),
|
||||
file_segment->cache, key_metadata, file_segment->queue_iterator);
|
||||
|
||||
if (diff)
|
||||
metadata->getQueueIterator()->updateSize(-diff);
|
||||
|
||||
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
|
||||
}
|
||||
|
||||
std::shared_ptr<const FileSegmentMetadata> LockedKey::getByOffset(size_t offset) const
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::shared_ptr<FileSegmentMetadata> LockedKey::getByOffset(size_t offset)
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::shared_ptr<const FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset) const
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
return nullptr;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::shared_ptr<FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset)
|
||||
{
|
||||
auto it = key_metadata->find(offset);
|
||||
if (it == key_metadata->end())
|
||||
return nullptr;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
std::string LockedKey::toString() const
|
||||
{
|
||||
std::string result;
|
||||
for (auto it = key_metadata->begin(); it != key_metadata->end(); ++it)
|
||||
{
|
||||
if (it != key_metadata->begin())
|
||||
result += ", ";
|
||||
result += std::to_string(it->first);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void CleanupQueue::add(const FileCacheKey & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
keys.insert(key);
|
||||
}
|
||||
|
||||
void CleanupQueue::remove(const FileCacheKey & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
bool erased = keys.erase(key);
|
||||
if (!erased)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key {} in removal queue", key);
|
||||
}
|
||||
|
||||
bool CleanupQueue::tryPop(FileCacheKey & key)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (keys.empty())
|
||||
return false;
|
||||
auto it = keys.begin();
|
||||
key = *it;
|
||||
keys.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t CleanupQueue::getSize() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return keys.size();
|
||||
}
|
||||
|
||||
}
|
176
src/Interpreters/Cache/Metadata.h
Normal file
176
src/Interpreters/Cache/Metadata.h
Normal file
@ -0,0 +1,176 @@
|
||||
#pragma once
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Interpreters/Cache/Guards.h>
|
||||
#include <Interpreters/Cache/IFileCachePriority.h>
|
||||
#include <Interpreters/Cache/FileCacheKey.h>
|
||||
#include <Interpreters/Cache/FileSegment.h>
|
||||
#include <Interpreters/Cache/FileCache_fwd_internal.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class CleanupQueue;
|
||||
using CleanupQueuePtr = std::shared_ptr<CleanupQueue>;
|
||||
|
||||
|
||||
struct FileSegmentMetadata : private boost::noncopyable
|
||||
{
|
||||
using Priority = IFileCachePriority;
|
||||
|
||||
explicit FileSegmentMetadata(FileSegmentPtr && file_segment_);
|
||||
|
||||
bool releasable() const { return file_segment.unique(); }
|
||||
|
||||
size_t size() const;
|
||||
|
||||
bool valid() const { return !removal_candidate.load(); }
|
||||
|
||||
Priority::Iterator getQueueIterator() { return file_segment->getQueueIterator(); }
|
||||
|
||||
FileSegmentPtr file_segment;
|
||||
std::atomic<bool> removal_candidate{false};
|
||||
};
|
||||
|
||||
using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
|
||||
|
||||
|
||||
struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
|
||||
private boost::noncopyable,
|
||||
public std::enable_shared_from_this<KeyMetadata>
|
||||
{
|
||||
friend struct LockedKey;
|
||||
using Key = FileCacheKey;
|
||||
|
||||
KeyMetadata(
|
||||
const Key & key_,
|
||||
const std::string & key_path_,
|
||||
CleanupQueue & cleanup_queue_,
|
||||
bool created_base_directory_ = false);
|
||||
|
||||
enum class KeyState
|
||||
{
|
||||
ACTIVE,
|
||||
REMOVING,
|
||||
REMOVED,
|
||||
};
|
||||
|
||||
const Key key;
|
||||
const std::string key_path;
|
||||
|
||||
LockedKeyPtr lock();
|
||||
|
||||
/// Return nullptr if key has non-ACTIVE state.
|
||||
LockedKeyPtr tryLock();
|
||||
|
||||
bool createBaseDirectory();
|
||||
|
||||
std::string getFileSegmentPath(const FileSegment & file_segment);
|
||||
|
||||
private:
|
||||
KeyState key_state = KeyState::ACTIVE;
|
||||
KeyGuard guard;
|
||||
CleanupQueue & cleanup_queue;
|
||||
std::atomic<bool> created_base_directory = false;
|
||||
};
|
||||
|
||||
using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
|
||||
|
||||
|
||||
struct CacheMetadata : public std::unordered_map<FileCacheKey, KeyMetadataPtr>, private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
using Key = FileCacheKey;
|
||||
using IterateCacheMetadataFunc = std::function<void(const LockedKey &)>;
|
||||
|
||||
explicit CacheMetadata(const std::string & path_);
|
||||
|
||||
const String & getBaseDirectory() const { return path; }
|
||||
|
||||
String getPathInLocalCache(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
FileSegmentKind segment_kind) const;
|
||||
|
||||
String getPathInLocalCache(const Key & key) const;
|
||||
static String getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind);
|
||||
|
||||
void iterate(IterateCacheMetadataFunc && func);
|
||||
|
||||
enum class KeyNotFoundPolicy
|
||||
{
|
||||
THROW,
|
||||
CREATE_EMPTY,
|
||||
RETURN_NULL,
|
||||
};
|
||||
|
||||
LockedKeyPtr lockKeyMetadata(
|
||||
const Key & key,
|
||||
KeyNotFoundPolicy key_not_found_policy,
|
||||
bool is_initial_load = false);
|
||||
|
||||
void doCleanup();
|
||||
|
||||
private:
|
||||
const std::string path; /// Cache base path
|
||||
CacheMetadataGuard guard;
|
||||
const CleanupQueuePtr cleanup_queue;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* `LockedKey` is an object which makes sure that as long as it exists the following is true:
|
||||
* 1. the key cannot be removed from cache
|
||||
* (Why: this LockedKey locks key metadata mutex in ctor, unlocks it in dtor, and so
|
||||
* when key is going to be deleted, key mutex is also locked.
|
||||
* Why it cannot be the other way round? E.g. that ctor of LockedKey locks the key
|
||||
* right after it was deleted? This case it taken into consideration in createLockedKey())
|
||||
* 2. the key cannot be modified, e.g. new offsets cannot be added to key; already existing
|
||||
* offsets cannot be deleted from the key
|
||||
* And also provides some methods which allow the owner of this LockedKey object to do such
|
||||
* modification of the key (adding/deleting offsets) and deleting the key from cache.
|
||||
*/
|
||||
struct LockedKey : private boost::noncopyable
|
||||
{
|
||||
using Key = FileCacheKey;
|
||||
|
||||
explicit LockedKey(std::shared_ptr<KeyMetadata> key_metadata_);
|
||||
|
||||
~LockedKey();
|
||||
|
||||
const Key & getKey() const { return key_metadata->key; }
|
||||
|
||||
auto begin() const { return key_metadata->begin(); }
|
||||
auto end() const { return key_metadata->end(); }
|
||||
|
||||
std::shared_ptr<const FileSegmentMetadata> getByOffset(size_t offset) const;
|
||||
std::shared_ptr<FileSegmentMetadata> getByOffset(size_t offset);
|
||||
|
||||
std::shared_ptr<const FileSegmentMetadata> tryGetByOffset(size_t offset) const;
|
||||
std::shared_ptr<FileSegmentMetadata> tryGetByOffset(size_t offset);
|
||||
|
||||
KeyMetadata::KeyState getKeyState() const { return key_metadata->key_state; }
|
||||
|
||||
std::shared_ptr<const KeyMetadata> getKeyMetadata() const { return key_metadata; }
|
||||
std::shared_ptr<KeyMetadata> getKeyMetadata() { return key_metadata; }
|
||||
|
||||
void removeAllReleasable();
|
||||
|
||||
KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &);
|
||||
|
||||
void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &);
|
||||
|
||||
bool isLastOwnerOfFileSegment(size_t offset) const;
|
||||
|
||||
void removeFromCleanupQueue();
|
||||
|
||||
void markAsRemoved();
|
||||
|
||||
std::string toString() const;
|
||||
|
||||
private:
|
||||
const std::shared_ptr<KeyMetadata> key_metadata;
|
||||
KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`.
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
}
|
112
src/Interpreters/Cache/QueryLimit.cpp
Normal file
112
src/Interpreters/Cache/QueryLimit.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
#include <Interpreters/Cache/QueryLimit.h>
|
||||
#include <Interpreters/Cache/Metadata.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static bool isQueryInitialized()
|
||||
{
|
||||
return CurrentThread::isInitialized()
|
||||
&& CurrentThread::get().getQueryContext()
|
||||
&& !CurrentThread::getQueryId().empty();
|
||||
}
|
||||
|
||||
FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CacheGuard::Lock &)
|
||||
{
|
||||
if (!isQueryInitialized())
|
||||
return nullptr;
|
||||
|
||||
auto query_iter = query_map.find(std::string(CurrentThread::getQueryId()));
|
||||
return (query_iter == query_map.end()) ? nullptr : query_iter->second;
|
||||
}
|
||||
|
||||
void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CacheGuard::Lock &)
|
||||
{
|
||||
auto query_iter = query_map.find(query_id);
|
||||
if (query_iter == query_map.end())
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Attempt to release query context that does not exist (query_id: {})",
|
||||
query_id);
|
||||
}
|
||||
query_map.erase(query_iter);
|
||||
}
|
||||
|
||||
FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext(
|
||||
const std::string & query_id,
|
||||
const ReadSettings & settings,
|
||||
const CacheGuard::Lock &)
|
||||
{
|
||||
if (query_id.empty())
|
||||
return nullptr;
|
||||
|
||||
auto [it, inserted] = query_map.emplace(query_id, nullptr);
|
||||
if (inserted)
|
||||
{
|
||||
it->second = std::make_shared<QueryContext>(
|
||||
settings.filesystem_cache_max_download_size,
|
||||
!settings.skip_download_if_exceeds_query_cache);
|
||||
}
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
FileCacheQueryLimit::QueryContext::QueryContext(
|
||||
size_t query_cache_size,
|
||||
bool recache_on_query_limit_exceeded_)
|
||||
: priority(LRUFileCachePriority(query_cache_size, 0))
|
||||
, recache_on_query_limit_exceeded(recache_on_query_limit_exceeded_)
|
||||
{
|
||||
}
|
||||
|
||||
void FileCacheQueryLimit::QueryContext::add(
|
||||
const FileSegment & file_segment,
|
||||
const CacheGuard::Lock & lock)
|
||||
{
|
||||
const auto key = file_segment.key();
|
||||
const auto offset = file_segment.offset();
|
||||
|
||||
auto it = getPriority().add(
|
||||
file_segment.getKeyMetadata(), offset, file_segment.range().size(), lock);
|
||||
|
||||
auto [_, inserted] = records.emplace(FileCacheKeyAndOffset{key, offset}, it);
|
||||
if (!inserted)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot add offset {} to query context under key {}, it already exists",
|
||||
offset, key);
|
||||
}
|
||||
}
|
||||
|
||||
void FileCacheQueryLimit::QueryContext::remove(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
const CacheGuard::Lock & lock)
|
||||
{
|
||||
auto record = records.find({key, offset});
|
||||
if (record == records.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no {}:{} in query context", key, offset);
|
||||
|
||||
record->second->remove(lock);
|
||||
records.erase({key, offset});
|
||||
}
|
||||
|
||||
IFileCachePriority::Iterator FileCacheQueryLimit::QueryContext::tryGet(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
const CacheGuard::Lock &)
|
||||
{
|
||||
auto it = records.find({key, offset});
|
||||
if (it == records.end())
|
||||
return nullptr;
|
||||
return it->second;
|
||||
|
||||
}
|
||||
|
||||
}
|
67
src/Interpreters/Cache/QueryLimit.h
Normal file
67
src/Interpreters/Cache/QueryLimit.h
Normal file
@ -0,0 +1,67 @@
|
||||
#pragma once
|
||||
#include <Interpreters/Cache/Guards.h>
|
||||
#include <Interpreters/Cache/LRUFileCachePriority.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct ReadSettings;
|
||||
class FileSegment;
|
||||
|
||||
class FileCacheQueryLimit
|
||||
{
|
||||
public:
|
||||
class QueryContext;
|
||||
using QueryContextPtr = std::shared_ptr<QueryContext>;
|
||||
|
||||
QueryContextPtr tryGetQueryContext(const CacheGuard::Lock & lock);
|
||||
|
||||
QueryContextPtr getOrSetQueryContext(
|
||||
const std::string & query_id,
|
||||
const ReadSettings & settings,
|
||||
const CacheGuard::Lock &);
|
||||
|
||||
void removeQueryContext(const std::string & query_id, const CacheGuard::Lock &);
|
||||
|
||||
class QueryContext
|
||||
{
|
||||
public:
|
||||
using Key = FileCacheKey;
|
||||
using Priority = IFileCachePriority;
|
||||
using PriorityIterator = IFileCachePriority::Iterator;
|
||||
|
||||
QueryContext(size_t query_cache_size, bool recache_on_query_limit_exceeded_);
|
||||
|
||||
Priority & getPriority() { return priority; }
|
||||
const Priority & getPriority() const { return priority; }
|
||||
|
||||
bool recacheOnFileCacheQueryLimitExceeded() const { return recache_on_query_limit_exceeded; }
|
||||
|
||||
IFileCachePriority::Iterator tryGet(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
const CacheGuard::Lock &);
|
||||
|
||||
void add(
|
||||
const FileSegment & file_segment,
|
||||
const CacheGuard::Lock &);
|
||||
|
||||
void remove(
|
||||
const Key & key,
|
||||
size_t offset,
|
||||
const CacheGuard::Lock &);
|
||||
|
||||
private:
|
||||
using Records = std::unordered_map<FileCacheKeyAndOffset, IFileCachePriority::Iterator, FileCacheKeyAndOffsetHash>;
|
||||
Records records;
|
||||
LRUFileCachePriority priority;
|
||||
const bool recache_on_query_limit_exceeded;
|
||||
};
|
||||
|
||||
private:
|
||||
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
|
||||
QueryContextMap query_map;
|
||||
};
|
||||
|
||||
using FileCacheQueryLimitPtr = std::unique_ptr<FileCacheQueryLimit>;
|
||||
|
||||
}
|
@ -17,17 +17,17 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_)
|
||||
: WriteBufferFromFileDecorator(file_segment_->detachWriter())
|
||||
: WriteBufferFromFileDecorator(std::make_unique<WriteBufferFromFile>(file_segment_->getPathInLocalCache()))
|
||||
, file_segment(file_segment_)
|
||||
{
|
||||
}
|
||||
|
||||
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolder && segment_holder_)
|
||||
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder_)
|
||||
: WriteBufferFromFileDecorator(
|
||||
segment_holder_.file_segments.size() == 1
|
||||
? segment_holder_.file_segments.front()->detachWriter()
|
||||
segment_holder_->size() == 1
|
||||
? std::make_unique<WriteBufferFromFile>(segment_holder_->front().getPathInLocalCache())
|
||||
: throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment"))
|
||||
, file_segment(segment_holder_.file_segments.front().get())
|
||||
, file_segment(&segment_holder_->front())
|
||||
, segment_holder(std::move(segment_holder_))
|
||||
{
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ class WriteBufferToFileSegment : public WriteBufferFromFileDecorator, public IRe
|
||||
{
|
||||
public:
|
||||
explicit WriteBufferToFileSegment(FileSegment * file_segment_);
|
||||
explicit WriteBufferToFileSegment(FileSegmentsHolder && segment_holder);
|
||||
explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder);
|
||||
|
||||
void nextImpl() override;
|
||||
|
||||
@ -28,7 +28,7 @@ private:
|
||||
FileSegment * file_segment;
|
||||
|
||||
/// Empty if file_segment is not owned by this WriteBufferToFileSegment
|
||||
FileSegmentsHolder segment_holder;
|
||||
FileSegmentsHolderPtr segment_holder;
|
||||
};
|
||||
|
||||
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <Coordination/KeeperDispatcher.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Storages/IStorage.h>
|
||||
@ -43,6 +42,9 @@
|
||||
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
|
||||
#include <Interpreters/TemporaryDataOnDisk.h>
|
||||
#include <Interpreters/Cache/QueryCache.h>
|
||||
#include <Interpreters/Cache/FileCacheFactory.h>
|
||||
#include <Interpreters/Cache/FileCache.h>
|
||||
#include <Core/ServerSettings.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/SettingsQuirks.h>
|
||||
@ -107,15 +109,12 @@
|
||||
#include <Interpreters/Lemmatizers.h>
|
||||
#include <Interpreters/ClusterDiscovery.h>
|
||||
#include <Interpreters/TransactionLog.h>
|
||||
#include <Interpreters/Cache/FileCacheFactory.h>
|
||||
#include <filesystem>
|
||||
#include <re2/re2.h>
|
||||
#include <Storages/StorageView.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
#include <Interpreters/Cache/FileCache.h>
|
||||
|
||||
#if USE_ROCKSDB
|
||||
#include <rocksdb/table.h>
|
||||
#endif
|
||||
@ -536,6 +535,12 @@ struct ContextSharedPart : boost::noncopyable
|
||||
/// take it as well, which will cause deadlock.
|
||||
delete_ddl_worker.reset();
|
||||
|
||||
/// Background operations in cache use background schedule pool.
|
||||
/// Deactivate them before destructing it.
|
||||
const auto & caches = FileCacheFactory::instance().getAll();
|
||||
for (const auto & [_, cache] : caches)
|
||||
cache->cache->deactivateBackgroundOperations();
|
||||
|
||||
{
|
||||
auto lock = std::lock_guard(mutex);
|
||||
|
||||
|
@ -20,7 +20,7 @@ static Block getSampleBlock()
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_elements"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_file_segment_size"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_on_write_operations"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "enable_cache_hits_threshold"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_hits_threshold"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_size"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_elements"},
|
||||
ColumnWithTypeAndName{std::make_shared<DataTypeString>(), "path"},
|
||||
@ -45,7 +45,7 @@ BlockIO InterpreterDescribeCacheQuery::execute()
|
||||
res_columns[1]->insert(settings.max_elements);
|
||||
res_columns[2]->insert(settings.max_file_segment_size);
|
||||
res_columns[3]->insert(settings.cache_on_write_operations);
|
||||
res_columns[4]->insert(settings.enable_cache_hits_threshold);
|
||||
res_columns[4]->insert(settings.cache_hits_threshold);
|
||||
res_columns[5]->insert(cache->getUsedCacheSize());
|
||||
res_columns[6]->insert(cache->getFileSegmentsNum());
|
||||
res_columns[7]->insert(cache->getBasePath());
|
||||
|
@ -364,12 +364,12 @@ BlockIO InterpreterSystemQuery::execute()
|
||||
{
|
||||
auto caches = FileCacheFactory::instance().getAll();
|
||||
for (const auto & [_, cache_data] : caches)
|
||||
cache_data->cache->removeIfReleasable();
|
||||
cache_data->cache->removeAllReleasable();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache;
|
||||
cache->removeIfReleasable();
|
||||
cache->removeAllReleasable();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -92,12 +92,15 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache and no volume");
|
||||
}
|
||||
|
||||
FileSegmentsHolder TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
|
||||
FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
|
||||
{
|
||||
if (!file_cache)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache");
|
||||
|
||||
return file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
|
||||
const auto key = FileSegment::Key::random();
|
||||
auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
|
||||
fs::create_directories(file_cache->getPathInLocalCache(key));
|
||||
return holder;
|
||||
}
|
||||
|
||||
TemporaryFileOnDiskHolder TemporaryDataOnDisk::createRegularFile(size_t max_file_size)
|
||||
@ -237,15 +240,14 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const
|
||||
LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath());
|
||||
}
|
||||
|
||||
TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_)
|
||||
TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_)
|
||||
: parent(parent_)
|
||||
, header(header_)
|
||||
, segment_holder(std::move(segments_))
|
||||
{
|
||||
if (segment_holder.file_segments.size() != 1)
|
||||
if (segment_holder->size() != 1)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment");
|
||||
auto & segment = segment_holder.file_segments.front();
|
||||
auto out_buf = std::make_unique<WriteBufferToFileSegment>(segment.get());
|
||||
auto out_buf = std::make_unique<WriteBufferToFileSegment>(&segment_holder->front());
|
||||
|
||||
LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", out_buf->getFileName());
|
||||
out_writer = std::make_unique<OutputWriter>(std::move(out_buf), header);
|
||||
@ -336,7 +338,7 @@ void TemporaryFileStream::updateAllocAndCheck()
|
||||
|
||||
bool TemporaryFileStream::isEof() const
|
||||
{
|
||||
return file == nullptr && segment_holder.empty();
|
||||
return file == nullptr && !segment_holder;
|
||||
}
|
||||
|
||||
void TemporaryFileStream::release()
|
||||
@ -356,7 +358,7 @@ void TemporaryFileStream::release()
|
||||
parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size);
|
||||
}
|
||||
|
||||
if (!segment_holder.empty())
|
||||
if (segment_holder)
|
||||
segment_holder.reset();
|
||||
}
|
||||
|
||||
@ -364,8 +366,8 @@ String TemporaryFileStream::getPath() const
|
||||
{
|
||||
if (file)
|
||||
return file->getPath();
|
||||
if (!segment_holder.file_segments.empty())
|
||||
return segment_holder.file_segments.front()->getPathInLocalCache();
|
||||
if (segment_holder && !segment_holder->empty())
|
||||
return segment_holder->front().getPathInLocalCache();
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream has no file");
|
||||
}
|
||||
|
@ -103,7 +103,7 @@ public:
|
||||
const StatAtomic & getStat() const { return stat; }
|
||||
|
||||
private:
|
||||
FileSegmentsHolder createCacheFile(size_t max_file_size);
|
||||
FileSegmentsHolderPtr createCacheFile(size_t max_file_size);
|
||||
TemporaryFileOnDiskHolder createRegularFile(size_t max_file_size);
|
||||
|
||||
mutable std::mutex mutex;
|
||||
@ -130,7 +130,7 @@ public:
|
||||
};
|
||||
|
||||
TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_);
|
||||
TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_);
|
||||
TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_);
|
||||
|
||||
size_t write(const Block & block);
|
||||
void flush();
|
||||
@ -161,7 +161,7 @@ private:
|
||||
|
||||
/// Data can be stored in file directly or in the cache
|
||||
TemporaryFileOnDiskHolder file;
|
||||
FileSegmentsHolder segment_holder;
|
||||
FileSegmentsHolderPtr segment_holder;
|
||||
|
||||
Stat stat;
|
||||
|
||||
|
@ -16,6 +16,9 @@
|
||||
#include <filesystem>
|
||||
#include <thread>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Poco/DOM/DOMParser.h>
|
||||
#include <base/sleep.h>
|
||||
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Disks/IO/CachedOnDiskWriteBufferFromFile.h>
|
||||
@ -26,21 +29,6 @@ using namespace DB;
|
||||
|
||||
static constexpr auto TEST_LOG_LEVEL = "debug";
|
||||
|
||||
void assertRange(
|
||||
[[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment,
|
||||
const DB::FileSegment::Range & expected_range, DB::FileSegment::State expected_state)
|
||||
{
|
||||
auto range = file_segment->range();
|
||||
|
||||
std::cerr << fmt::format("\nAssert #{} : {} == {} (state: {} == {})\n", assert_n,
|
||||
range.toString(), expected_range.toString(),
|
||||
toString(file_segment->state()), toString(expected_state));
|
||||
|
||||
ASSERT_EQ(range.left, expected_range.left);
|
||||
ASSERT_EQ(range.right, expected_range.right);
|
||||
ASSERT_EQ(file_segment->state(), expected_state);
|
||||
}
|
||||
|
||||
void printRanges(const auto & segments)
|
||||
{
|
||||
std::cerr << "\nHaving file segments: ";
|
||||
@ -48,21 +36,16 @@ void printRanges(const auto & segments)
|
||||
std::cerr << '\n' << segment->range().toString() << " (state: " + DB::FileSegment::stateToString(segment->state()) + ")" << "\n";
|
||||
}
|
||||
|
||||
std::vector<DB::FileSegmentPtr> fromHolder(const DB::FileSegmentsHolder & holder)
|
||||
{
|
||||
return std::vector<DB::FileSegmentPtr>(holder.file_segments.begin(), holder.file_segments.end());
|
||||
}
|
||||
|
||||
String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & key, size_t offset)
|
||||
{
|
||||
auto key_str = key.toString();
|
||||
return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset);
|
||||
}
|
||||
|
||||
void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
|
||||
void download(const std::string & cache_base_path, DB::FileSegment & file_segment)
|
||||
{
|
||||
const auto & key = file_segment->key();
|
||||
size_t size = file_segment->range().size();
|
||||
const auto & key = file_segment.key();
|
||||
size_t size = file_segment.range().size();
|
||||
|
||||
auto key_str = key.toString();
|
||||
auto subdir = fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
|
||||
@ -70,29 +53,94 @@ void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segme
|
||||
fs::create_directories(subdir);
|
||||
|
||||
std::string data(size, '0');
|
||||
file_segment->write(data.data(), size, file_segment->getCurrentWriteOffset());
|
||||
file_segment.write(data.data(), size, file_segment.getCurrentWriteOffset(false));
|
||||
}
|
||||
|
||||
void prepareAndDownload(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
|
||||
{
|
||||
ASSERT_TRUE(file_segment->reserve(file_segment->range().size()));
|
||||
download(cache_base_path, file_segment);
|
||||
}
|
||||
using Range = FileSegment::Range;
|
||||
using Ranges = std::vector<Range>;
|
||||
using State = FileSegment::State;
|
||||
using States = std::vector<State>;
|
||||
using Holder = FileSegmentsHolder;
|
||||
using HolderPtr = FileSegmentsHolderPtr;
|
||||
|
||||
void complete(const std::string & cache_base_path, const DB::FileSegmentsHolder & holder)
|
||||
fs::path caches_dir = fs::current_path() / "lru_cache_test";
|
||||
std::string cache_base_path = caches_dir / "cache1" / "";
|
||||
|
||||
|
||||
void assertEqual(const HolderPtr & holder, const Ranges & expected_ranges, const States & expected_states = {})
|
||||
{
|
||||
for (const auto & file_segment : holder.file_segments)
|
||||
std::cerr << "Holder: " << holder->toString() << "\n";
|
||||
ASSERT_EQ(holder->size(), expected_ranges.size());
|
||||
|
||||
if (!expected_states.empty())
|
||||
ASSERT_EQ(holder->size(), expected_states.size());
|
||||
|
||||
auto get_expected_state = [&](size_t i)
|
||||
{
|
||||
ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, file_segment);
|
||||
file_segment->completeWithoutState();
|
||||
if (expected_states.empty())
|
||||
return State::DOWNLOADED;
|
||||
else
|
||||
return expected_states[i];
|
||||
};
|
||||
|
||||
size_t i = 0;
|
||||
for (const auto & file_segment : *holder)
|
||||
{
|
||||
ASSERT_EQ(file_segment->range(), expected_ranges[i]);
|
||||
ASSERT_EQ(file_segment->state(), get_expected_state(i));
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
FileSegment & get(const HolderPtr & holder, int i)
|
||||
{
|
||||
auto it = std::next(holder->begin(), i);
|
||||
if (it == holder->end())
|
||||
std::terminate();
|
||||
return **it;
|
||||
}
|
||||
|
||||
void download(FileSegment & file_segment)
|
||||
{
|
||||
std::cerr << "Downloading range " << file_segment.range().toString() << "\n";
|
||||
|
||||
ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
|
||||
ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
|
||||
ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
|
||||
|
||||
ASSERT_TRUE(file_segment.reserve(file_segment.range().size()));
|
||||
download(cache_base_path, file_segment);
|
||||
ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
|
||||
|
||||
file_segment.complete();
|
||||
ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
|
||||
}
|
||||
|
||||
void assertDownloadFails(FileSegment & file_segment)
|
||||
{
|
||||
ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
|
||||
ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
|
||||
ASSERT_FALSE(file_segment.reserve(file_segment.range().size()));
|
||||
file_segment.complete();
|
||||
}
|
||||
|
||||
void download(const HolderPtr & holder)
|
||||
{
|
||||
for (auto & it : *holder)
|
||||
{
|
||||
download(*it);
|
||||
}
|
||||
}
|
||||
|
||||
void increasePriority(const HolderPtr & holder)
|
||||
{
|
||||
for (auto & it : *holder)
|
||||
it->use();
|
||||
}
|
||||
|
||||
class FileCacheTest : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
|
||||
static void setupLogs(const std::string & level)
|
||||
{
|
||||
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
|
||||
@ -118,8 +166,6 @@ public:
|
||||
fs::remove_all(cache_base_path);
|
||||
}
|
||||
|
||||
fs::path caches_dir = fs::current_path() / "lru_cache_test";
|
||||
std::string cache_base_path = caches_dir / "cache1" / "";
|
||||
};
|
||||
|
||||
TEST_F(FileCacheTest, get)
|
||||
@ -128,6 +174,14 @@ TEST_F(FileCacheTest, get)
|
||||
|
||||
/// To work with cache need query_id and query context.
|
||||
std::string query_id = "query_id";
|
||||
|
||||
Poco::XML::DOMParser dom_parser;
|
||||
std::string xml(R"CONFIG(<clickhouse>
|
||||
</clickhouse>)CONFIG");
|
||||
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
|
||||
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
|
||||
getMutableContext().context->setConfig(config);
|
||||
|
||||
auto query_context = DB::Context::createCopy(getContext().context);
|
||||
query_context->makeQueryContext();
|
||||
query_context->setCurrentQueryId(query_id);
|
||||
@ -140,255 +194,249 @@ TEST_F(FileCacheTest, get)
|
||||
settings.max_elements = 5;
|
||||
|
||||
{
|
||||
std::cerr << "Step 1\n";
|
||||
auto cache = DB::FileCache(settings);
|
||||
cache.initialize();
|
||||
auto key = cache.hash("key1");
|
||||
auto key = cache.createKeyForPath("key1");
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 0, 10, {}); /// Add range [0, 9]
|
||||
auto segments = fromHolder(holder);
|
||||
/// Range was not present in cache. It should be added in cache as one while file segment.
|
||||
ASSERT_EQ(segments.size(), 1);
|
||||
|
||||
assertRange(1, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
|
||||
|
||||
/// Exception because space not reserved.
|
||||
/// EXPECT_THROW(download(segments[0]), DB::Exception);
|
||||
/// Exception because space can be reserved only by downloader
|
||||
/// EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception);
|
||||
|
||||
ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size()));
|
||||
assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING);
|
||||
|
||||
download(cache_base_path, segments[0]);
|
||||
segments[0]->completeWithoutState();
|
||||
assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
|
||||
assertEqual(holder, { Range(0, 9) }, { State::EMPTY });
|
||||
download(holder->front());
|
||||
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
/// Current cache: [__________]
|
||||
/// ^ ^
|
||||
/// 0 9
|
||||
assertEqual(cache.getSnapshot(key), { Range(0, 9) });
|
||||
assertEqual(cache.dumpQueue(), { Range(0, 9) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||
|
||||
std::cerr << "Step 2\n";
|
||||
|
||||
{
|
||||
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
|
||||
auto holder = cache.getOrSet(key, 5, 10, {});
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 2);
|
||||
|
||||
assertRange(4, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, segments[1]);
|
||||
segments[1]->completeWithoutState();
|
||||
assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
|
||||
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY });
|
||||
download(get(holder, 1));
|
||||
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
/// Current cache: [__________][_____]
|
||||
/// ^ ^^ ^
|
||||
/// 0 910 14
|
||||
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
|
||||
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 15);
|
||||
|
||||
std::cerr << "Step 3\n";
|
||||
|
||||
/// Get [9, 9]
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 9, 1, {}); /// Get [9, 9]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 1);
|
||||
assertRange(7, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
|
||||
auto holder = cache.getOrSet(key, 9, 1, {});
|
||||
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
|
||||
/// Get [9, 10]
|
||||
assertEqual(cache.getOrSet(key, 9, 2, {}),
|
||||
{ Range(0, 9), Range(10, 14) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED });
|
||||
|
||||
/// Get [10, 10]
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 10, 1, {});
|
||||
assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
|
||||
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 15);
|
||||
|
||||
std::cerr << "Step 4\n";
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 17, 4, {});
|
||||
download(holder); /// Get [17, 20]
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 9, 2, {}); /// Get [9, 10]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 2);
|
||||
assertRange(8, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(9, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
|
||||
auto holder = cache.getOrSet(key, 24, 3, {});
|
||||
download(holder); /// Get [24, 26]
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 10, 1, {}); /// Get [10, 10]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 1);
|
||||
assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
|
||||
auto holder = cache.getOrSet(key, 27, 1, {});
|
||||
download(holder); /// Get [27, 27]
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
complete(cache_base_path, cache.getOrSet(key, 17, 4, {})); /// Get [17, 20]
|
||||
complete(cache_base_path, cache.getOrSet(key, 24, 3, {})); /// Get [24, 26]
|
||||
/// completeWithState(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27]
|
||||
|
||||
/// Current cache: [__________][_____] [____] [___][]
|
||||
/// ^ ^^ ^ ^ ^ ^ ^^^
|
||||
/// 0 910 14 17 20 24 2627
|
||||
///
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 22);
|
||||
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
|
||||
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 23);
|
||||
|
||||
std::cerr << "Step 5\n";
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 6);
|
||||
|
||||
assertRange(11, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(12, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
/// Missing [15, 16] should be added in cache.
|
||||
assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, segments[2]);
|
||||
|
||||
segments[2]->completeWithoutState();
|
||||
|
||||
assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
/// New [21, 23], but will not be added in cache because of elements limit (5)
|
||||
assertRange(15, segments[4], DB::FileSegment::Range(21, 23), DB::FileSegment::State::EMPTY);
|
||||
ASSERT_TRUE(segments[4]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_FALSE(segments[4]->reserve(1));
|
||||
|
||||
assertRange(16, segments[5], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
/// Current cache: [__________][_____][ ][____] [___]
|
||||
/// ^ ^ ^
|
||||
/// 0 20 24
|
||||
///
|
||||
assertEqual(holder,
|
||||
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
|
||||
download(get(holder, 2)); /// [27, 27] was evicted.
|
||||
assertEqual(holder,
|
||||
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
|
||||
assertDownloadFails(get(holder, 4));
|
||||
assertEqual(holder,
|
||||
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DETACHED, State::DOWNLOADED });
|
||||
|
||||
/// Range [27, 27] must be evicted in previous getOrSet [0, 25].
|
||||
/// Let's not invalidate pointers to returned segments from range [0, 25] and
|
||||
/// as max elements size is reached, next attempt to put something in cache should fail.
|
||||
/// This will also check that [27, 27] was indeed evicted.
|
||||
auto holder2 = cache.getOrSet(key, 27, 1, {});
|
||||
assertEqual(holder2, { Range(27, 27) }, { State::EMPTY });
|
||||
assertDownloadFails(holder2->front());
|
||||
assertEqual(holder2, { Range(27, 27) }, { State::DETACHED });
|
||||
|
||||
auto holder1 = cache.getOrSet(key, 27, 1, {});
|
||||
auto segments_1 = fromHolder(holder1); /// Get [27, 27]
|
||||
ASSERT_EQ(segments_1.size(), 1);
|
||||
assertRange(17, segments_1[0], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
|
||||
auto holder3 = cache.getOrSet(key, 28, 3, {});
|
||||
assertEqual(holder3, { Range(28, 30) }, { State::EMPTY });
|
||||
assertDownloadFails(holder3->front());
|
||||
assertEqual(holder3, { Range(28, 30) }, { State::DETACHED });
|
||||
|
||||
increasePriority(holder);
|
||||
increasePriority(holder2);
|
||||
increasePriority(holder3);
|
||||
}
|
||||
|
||||
/// Current cache: [__________][_____][ ][____] [___]
|
||||
/// ^ ^ ^
|
||||
/// 0 20 24
|
||||
///
|
||||
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
|
||||
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 24);
|
||||
|
||||
std::cerr << "Step 6\n";
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 4);
|
||||
|
||||
assertRange(18, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(19, segments[1], DB::FileSegment::Range(15, 16), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(20, segments[2], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, segments[3]);
|
||||
|
||||
segments[3]->completeWithoutState();
|
||||
ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED);
|
||||
assertEqual(holder,
|
||||
{ Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
|
||||
download(get(holder, 3));
|
||||
assertEqual(holder,
|
||||
{ Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
/// Current cache: [_____][__][____][_] [___]
|
||||
/// ^ ^ ^ ^ ^
|
||||
/// 10 17 21 24 26
|
||||
|
||||
assertEqual(cache.getSnapshot(key), { Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21), Range(24, 26) });
|
||||
assertEqual(cache.dumpQueue(), { Range(24, 26), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 15);
|
||||
|
||||
std::cerr << "Step 7\n";
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 28]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 3);
|
||||
|
||||
assertRange(22, segments[0], DB::FileSegment::Range(23, 23), DB::FileSegment::State::EMPTY);
|
||||
assertRange(23, segments[1], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(24, segments[2], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, segments[0]);
|
||||
prepareAndDownload(cache_base_path, segments[2]);
|
||||
segments[0]->completeWithoutState();
|
||||
segments[2]->completeWithoutState();
|
||||
auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27]
|
||||
assertEqual(holder,
|
||||
{ Range(23, 23), Range(24, 26), Range(27, 27) },
|
||||
{ State::EMPTY, State::DOWNLOADED, State::EMPTY });
|
||||
download(get(holder, 0));
|
||||
download(get(holder, 2));
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
/// Current cache: [____][_] [][___][__]
|
||||
/// ^ ^ ^^^ ^^ ^
|
||||
/// 17 21 2324 26 28
|
||||
|
||||
{
|
||||
auto holder5 = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
|
||||
auto s5 = fromHolder(holder5);
|
||||
ASSERT_EQ(s5.size(), 1);
|
||||
assertRange(25, s5[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::EMPTY);
|
||||
|
||||
auto holder1 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
|
||||
auto s1 = fromHolder(holder1);
|
||||
ASSERT_EQ(s1.size(), 1);
|
||||
assertRange(26, s1[0], DB::FileSegment::Range(30, 31), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, s5[0]);
|
||||
prepareAndDownload(cache_base_path, s1[0]);
|
||||
s5[0]->completeWithoutState();
|
||||
s1[0]->completeWithoutState();
|
||||
|
||||
/// Current cache: [___] [_][___][_] [__]
|
||||
/// ^ ^ ^ ^ ^ ^ ^ ^
|
||||
/// 2 4 23 24 26 27 30 31
|
||||
|
||||
auto holder2 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
|
||||
auto s2 = fromHolder(holder2);
|
||||
ASSERT_EQ(s2.size(), 1);
|
||||
|
||||
auto holder3 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
|
||||
auto s3 = fromHolder(holder3);
|
||||
ASSERT_EQ(s3.size(), 1);
|
||||
|
||||
auto holder4 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
|
||||
auto s4 = fromHolder(holder4);
|
||||
ASSERT_EQ(s4.size(), 1);
|
||||
|
||||
/// All cache is now unreleasable because pointers are still hold
|
||||
auto holder6 = cache.getOrSet(key, 0, 40, {});
|
||||
auto f = fromHolder(holder6);
|
||||
ASSERT_EQ(f.size(), 9);
|
||||
|
||||
assertRange(27, f[0], DB::FileSegment::Range(0, 1), DB::FileSegment::State::EMPTY);
|
||||
assertRange(28, f[2], DB::FileSegment::Range(5, 22), DB::FileSegment::State::EMPTY);
|
||||
assertRange(29, f[6], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
|
||||
assertRange(30, f[8], DB::FileSegment::Range(32, 39), DB::FileSegment::State::EMPTY);
|
||||
|
||||
ASSERT_TRUE(f[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(f[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(f[6]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(f[8]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
|
||||
ASSERT_FALSE(f[0]->reserve(1));
|
||||
ASSERT_FALSE(f[2]->reserve(1));
|
||||
ASSERT_FALSE(f[6]->reserve(1));
|
||||
ASSERT_FALSE(f[8]->reserve(1));
|
||||
}
|
||||
/// 17 21 2324 26 27
|
||||
assertEqual(cache.getSnapshot(key), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
|
||||
assertEqual(cache.dumpQueue(), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
|
||||
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||
|
||||
std::cerr << "Step 8\n";
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 1);
|
||||
assertRange(31, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
|
||||
assertEqual(holder, { Range(2, 4) }, { State::EMPTY });
|
||||
|
||||
auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
|
||||
assertEqual(holder2, { Range(30, 31) }, { State::EMPTY });
|
||||
|
||||
download(get(holder, 0));
|
||||
download(get(holder2, 0));
|
||||
|
||||
auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
|
||||
assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED });
|
||||
|
||||
auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
|
||||
assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED });
|
||||
|
||||
auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
|
||||
assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED });
|
||||
|
||||
auto holder6 = cache.getOrSet(key, 0, 40, {});
|
||||
assertEqual(holder6,
|
||||
{ Range(0, 1), Range(2, 4), Range(5, 22), Range(23, 23), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31), Range(32, 39) },
|
||||
{ State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY });
|
||||
|
||||
assertDownloadFails(get(holder6, 0));
|
||||
assertDownloadFails(get(holder6, 2));
|
||||
assertDownloadFails(get(holder6, 6));
|
||||
assertDownloadFails(get(holder6, 8));
|
||||
|
||||
increasePriority(holder);
|
||||
increasePriority(holder2);
|
||||
increasePriority(holder3);
|
||||
increasePriority(holder4);
|
||||
increasePriority(holder5);
|
||||
increasePriority(holder6);
|
||||
}
|
||||
|
||||
/// Current cache: [___] [_][___][_] [__]
|
||||
/// ^ ^ ^ ^ ^ ^ ^ ^
|
||||
/// 2 4 23 24 26 27 30 31
|
||||
assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
|
||||
assertEqual(cache.dumpQueue(), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
|
||||
|
||||
std::cerr << "Step 9\n";
|
||||
|
||||
/// Get [2, 4]
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 2, 3, {});
|
||||
assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED });
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29]
|
||||
auto segments = fromHolder(holder);
|
||||
ASSERT_EQ(segments.size(), 3);
|
||||
assertEqual(holder,
|
||||
{ Range(24, 26), Range(27, 27), Range(28, 29) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
|
||||
|
||||
assertRange(32, segments[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(33, segments[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
assertRange(34, segments[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
|
||||
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
|
||||
auto & file_segment = get(holder, 2);
|
||||
ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
|
||||
ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
|
||||
|
||||
bool lets_start_download = false;
|
||||
std::mutex mutex;
|
||||
@ -403,16 +451,13 @@ TEST_F(FileCacheTest, get)
|
||||
chassert(&DB::CurrentThread::get() == &thread_status_1);
|
||||
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
|
||||
|
||||
auto holder_2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
|
||||
auto segments_2 = fromHolder(holder_2);
|
||||
ASSERT_EQ(segments.size(), 3);
|
||||
auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
|
||||
assertEqual(holder2,
|
||||
{ Range(24, 26), Range(27, 27), Range(28, 29) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING });
|
||||
|
||||
assertRange(35, segments_2[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(36, segments_2[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(37, segments_2[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADING);
|
||||
|
||||
ASSERT_TRUE(segments[2]->getOrSetDownloader() != DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
|
||||
auto & file_segment2 = get(holder2, 2);
|
||||
ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -420,8 +465,8 @@ TEST_F(FileCacheTest, get)
|
||||
}
|
||||
cv.notify_one();
|
||||
|
||||
segments_2[2]->wait();
|
||||
ASSERT_TRUE(segments_2[2]->state() == DB::FileSegment::State::DOWNLOADED);
|
||||
file_segment2.wait(file_segment2.range().left);
|
||||
ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
|
||||
});
|
||||
|
||||
{
|
||||
@ -429,35 +474,34 @@ TEST_F(FileCacheTest, get)
|
||||
cv.wait(lock, [&]{ return lets_start_download; });
|
||||
}
|
||||
|
||||
prepareAndDownload(cache_base_path, segments[2]);
|
||||
segments[2]->completeWithoutState();
|
||||
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED);
|
||||
download(file_segment);
|
||||
ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
|
||||
|
||||
other_1.join();
|
||||
|
||||
increasePriority(holder);
|
||||
}
|
||||
|
||||
/// Current cache: [___] [___][_][__][__]
|
||||
/// ^ ^ ^ ^ ^^ ^^ ^
|
||||
/// 2 4 24 26 27 2930 31
|
||||
assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31) });
|
||||
assertEqual(cache.dumpQueue(), { Range(30, 31), Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29) });
|
||||
|
||||
std::cerr << "Step 10\n";
|
||||
{
|
||||
/// Now let's check the similar case but getting ERROR state after segment->wait(), when
|
||||
/// state is changed not manually via segment->completeWithState(state) but from destructor of holder
|
||||
/// and notify_all() is also called from destructor of holder.
|
||||
|
||||
std::optional<DB::FileSegmentsHolder> holder;
|
||||
holder.emplace(cache.getOrSet(key, 3, 23, {})); /// Get [3, 25]
|
||||
auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25]
|
||||
assertEqual(holder,
|
||||
{ Range(2, 4), Range(5, 23), Range(24, 26) },
|
||||
{ State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
|
||||
|
||||
auto segments = fromHolder(*holder);
|
||||
ASSERT_EQ(segments.size(), 3);
|
||||
|
||||
assertRange(38, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
assertRange(39, segments[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::EMPTY);
|
||||
ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADING);
|
||||
|
||||
assertRange(40, segments[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
auto & file_segment = get(holder, 1);
|
||||
ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
|
||||
ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
|
||||
|
||||
bool lets_start_download = false;
|
||||
std::mutex mutex;
|
||||
@ -472,16 +516,13 @@ TEST_F(FileCacheTest, get)
|
||||
chassert(&DB::CurrentThread::get() == &thread_status_1);
|
||||
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
|
||||
|
||||
auto holder_2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
|
||||
auto segments_2 = fromHolder(*holder);
|
||||
ASSERT_EQ(segments_2.size(), 3);
|
||||
auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
|
||||
assertEqual(holder,
|
||||
{ Range(2, 4), Range(5, 23), Range(24, 26) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED });
|
||||
|
||||
assertRange(41, segments_2[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(42, segments_2[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADING);
|
||||
assertRange(43, segments_2[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
|
||||
ASSERT_TRUE(segments_2[1]->getDownloader() != DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::DOWNLOADING);
|
||||
auto & file_segment2 = get(holder, 1);
|
||||
ASSERT_TRUE(file_segment2.getDownloader() != FileSegment::getCallerId());
|
||||
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -489,13 +530,10 @@ TEST_F(FileCacheTest, get)
|
||||
}
|
||||
cv.notify_one();
|
||||
|
||||
segments_2[1]->wait();
|
||||
printRanges(segments_2);
|
||||
ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
|
||||
|
||||
ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
prepareAndDownload(cache_base_path, segments_2[1]);
|
||||
segments_2[1]->completeWithoutState();
|
||||
file_segment2.wait(file_segment2.range().left);
|
||||
ASSERT_TRUE(file_segment2.state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
|
||||
ASSERT_TRUE(file_segment2.getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
download(file_segment2);
|
||||
});
|
||||
|
||||
{
|
||||
@ -505,8 +543,7 @@ TEST_F(FileCacheTest, get)
|
||||
|
||||
holder.reset();
|
||||
other_1.join();
|
||||
printRanges(segments);
|
||||
ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADED);
|
||||
ASSERT_TRUE(file_segment.state() == DB::FileSegment::State::DOWNLOADED);
|
||||
}
|
||||
}
|
||||
|
||||
@ -514,55 +551,103 @@ TEST_F(FileCacheTest, get)
|
||||
/// ^ ^^ ^ ^^ ^ ^
|
||||
/// 2 45 24 2627 28 29
|
||||
|
||||
std::cerr << "Step 11\n";
|
||||
{
|
||||
/// Test LRUCache::restore().
|
||||
|
||||
auto cache2 = DB::FileCache(settings);
|
||||
cache2.initialize();
|
||||
auto key = cache2.hash("key1");
|
||||
auto key = cache2.createKeyForPath("key1");
|
||||
|
||||
auto holder1 = cache2.getOrSet(key, 2, 28, {}); /// Get [2, 29]
|
||||
|
||||
auto segments1 = fromHolder(holder1);
|
||||
ASSERT_EQ(segments1.size(), 5);
|
||||
|
||||
assertRange(44, segments1[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(45, segments1[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(45, segments1[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(46, segments1[3], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
|
||||
assertRange(47, segments1[4], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADED);
|
||||
/// Get [2, 29]
|
||||
assertEqual(cache2.getOrSet(key, 2, 28, {}),
|
||||
{ Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29) },
|
||||
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
|
||||
}
|
||||
|
||||
std::cerr << "Step 12\n";
|
||||
{
|
||||
/// Test max file segment size
|
||||
|
||||
auto settings2 = settings;
|
||||
settings2.max_file_segment_size = 10;
|
||||
settings2.base_path = caches_dir / "cache2";
|
||||
fs::create_directories(settings2.base_path);
|
||||
auto cache2 = DB::FileCache(settings2);
|
||||
cache2.initialize();
|
||||
auto key = cache2.hash("key1");
|
||||
auto key = cache2.createKeyForPath("key1");
|
||||
|
||||
auto holder1 = cache2.getOrSet(key, 0, 25, {}); /// Get [0, 24]
|
||||
auto segments1 = fromHolder(holder1);
|
||||
|
||||
ASSERT_EQ(segments1.size(), 3);
|
||||
assertRange(48, segments1[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
|
||||
assertRange(49, segments1[1], DB::FileSegment::Range(10, 19), DB::FileSegment::State::EMPTY);
|
||||
assertRange(50, segments1[2], DB::FileSegment::Range(20, 24), DB::FileSegment::State::EMPTY);
|
||||
/// Get [0, 24]
|
||||
assertEqual(cache2.getOrSet(key, 0, 25, {}),
|
||||
{ Range(0, 9), Range(10, 19), Range(20, 24) },
|
||||
{ State::EMPTY, State::EMPTY, State::EMPTY });
|
||||
}
|
||||
|
||||
std::cerr << "Step 13\n";
|
||||
{
|
||||
/// Test delated cleanup
|
||||
|
||||
auto cache = FileCache(settings);
|
||||
cache.initialize();
|
||||
cache.cleanup();
|
||||
const auto key = cache.createKeyForPath("key10");
|
||||
const auto key_path = cache.getPathInLocalCache(key);
|
||||
|
||||
cache.removeAllReleasable();
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 0);
|
||||
ASSERT_TRUE(!fs::exists(key_path));
|
||||
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
|
||||
|
||||
download(cache.getOrSet(key, 0, 10, {}));
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||
ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
|
||||
|
||||
cache.removeAllReleasable();
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 0);
|
||||
ASSERT_TRUE(fs::exists(key_path));
|
||||
ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
|
||||
|
||||
cache.cleanup();
|
||||
ASSERT_TRUE(!fs::exists(key_path));
|
||||
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
|
||||
}
|
||||
|
||||
std::cerr << "Step 14\n";
|
||||
{
|
||||
/// Test background thread delated cleanup
|
||||
|
||||
auto settings2{settings};
|
||||
settings2.delayed_cleanup_interval_ms = 0;
|
||||
auto cache = DB::FileCache(settings2);
|
||||
cache.initialize();
|
||||
const auto key = cache.createKeyForPath("key10");
|
||||
const auto key_path = cache.getPathInLocalCache(key);
|
||||
|
||||
cache.removeAllReleasable();
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 0);
|
||||
ASSERT_TRUE(!fs::exists(key_path));
|
||||
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
|
||||
|
||||
download(cache.getOrSet(key, 0, 10, {}));
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 10);
|
||||
ASSERT_TRUE(fs::exists(key_path));
|
||||
|
||||
cache.removeAllReleasable();
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 0);
|
||||
sleepForSeconds(2);
|
||||
ASSERT_TRUE(!fs::exists(key_path));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(FileCacheTest, writeBuffer)
|
||||
{
|
||||
DB::FileCacheSettings settings;
|
||||
FileCacheSettings settings;
|
||||
settings.max_size = 100;
|
||||
settings.max_elements = 5;
|
||||
settings.max_file_segment_size = 5;
|
||||
settings.base_path = cache_base_path;
|
||||
|
||||
DB::FileCache cache(settings);
|
||||
FileCache cache(settings);
|
||||
cache.initialize();
|
||||
|
||||
auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush)
|
||||
@ -571,10 +656,13 @@ TEST_F(FileCacheTest, writeBuffer)
|
||||
segment_settings.kind = FileSegmentKind::Temporary;
|
||||
segment_settings.unbounded = true;
|
||||
|
||||
auto holder = cache.set(cache.hash(key), 0, 3, segment_settings);
|
||||
EXPECT_EQ(holder.file_segments.size(), 1);
|
||||
auto & segment = holder.file_segments.front();
|
||||
WriteBufferToFileSegment out(segment.get());
|
||||
auto cache_key = cache.createKeyForPath(key);
|
||||
auto holder = cache.set(cache_key, 0, 3, segment_settings);
|
||||
/// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
|
||||
std::filesystem::create_directories(cache.getPathInLocalCache(cache_key));
|
||||
EXPECT_EQ(holder->size(), 1);
|
||||
auto & segment = holder->front();
|
||||
WriteBufferToFileSegment out(&segment);
|
||||
std::list<std::thread> threads;
|
||||
std::mutex mu;
|
||||
for (const auto & s : data)
|
||||
@ -600,18 +688,18 @@ TEST_F(FileCacheTest, writeBuffer)
|
||||
std::vector<fs::path> file_segment_paths;
|
||||
{
|
||||
auto holder = write_to_cache("key1", {"abc", "defg"}, false);
|
||||
file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache());
|
||||
file_segment_paths.emplace_back(holder->front().getPathInLocalCache());
|
||||
|
||||
ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7);
|
||||
ASSERT_TRUE(holder.file_segments.front()->range() == FileSegment::Range(0, 7));
|
||||
ASSERT_TRUE(holder->front().range() == FileSegment::Range(0, 7));
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 7);
|
||||
|
||||
{
|
||||
auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}, true);
|
||||
file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache());
|
||||
file_segment_paths.emplace_back(holder2->front().getPathInLocalCache());
|
||||
|
||||
ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15);
|
||||
ASSERT_TRUE(holder2.file_segments.front()->range() == FileSegment::Range(0, 15));
|
||||
ASSERT_TRUE(holder2->front().range() == FileSegment::Range(0, 15));
|
||||
ASSERT_EQ(cache.getUsedCacheSize(), 22);
|
||||
}
|
||||
ASSERT_FALSE(fs::exists(file_segment_paths.back()));
|
||||
@ -668,17 +756,16 @@ TEST_F(FileCacheTest, temporaryData)
|
||||
|
||||
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, 0);
|
||||
|
||||
auto some_data_holder = file_cache.getOrSet(file_cache.hash("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
|
||||
auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
|
||||
|
||||
{
|
||||
auto segments = fromHolder(some_data_holder);
|
||||
ASSERT_EQ(segments.size(), 5);
|
||||
for (auto & segment : segments)
|
||||
ASSERT_EQ(some_data_holder->size(), 5);
|
||||
for (auto & segment : *some_data_holder)
|
||||
{
|
||||
ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
|
||||
ASSERT_TRUE(segment->reserve(segment->range().size()));
|
||||
download(cache_base_path, segment);
|
||||
segment->completeWithoutState();
|
||||
download(*segment);
|
||||
segment->complete();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes()
|
||||
{"cache_name", std::make_shared<DataTypeString>()},
|
||||
{"cache_base_path", std::make_shared<DataTypeString>()},
|
||||
{"cache_path", std::make_shared<DataTypeString>()},
|
||||
{"key", std::make_shared<DataTypeString>()},
|
||||
{"file_segment_range_begin", std::make_shared<DataTypeUInt64>()},
|
||||
{"file_segment_range_end", std::make_shared<DataTypeUInt64>()},
|
||||
{"size", std::make_shared<DataTypeUInt64>()},
|
||||
@ -45,27 +46,27 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
|
||||
const auto & cache = cache_data->cache;
|
||||
auto file_segments = cache->getSnapshot();
|
||||
|
||||
for (const auto & file_segment : file_segments)
|
||||
for (const auto & file_segment : *file_segments)
|
||||
{
|
||||
res_columns[0]->insert(cache_name);
|
||||
res_columns[1]->insert(cache->getBasePath());
|
||||
|
||||
/// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference
|
||||
/// (because file_segments in getSnapshot doesn't have `cache` field set)
|
||||
res_columns[2]->insert(
|
||||
cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
|
||||
res_columns[2]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
|
||||
res_columns[3]->insert(file_segment->key().toString());
|
||||
|
||||
const auto & range = file_segment->range();
|
||||
res_columns[3]->insert(range.left);
|
||||
res_columns[4]->insert(range.right);
|
||||
res_columns[5]->insert(range.size());
|
||||
res_columns[6]->insert(FileSegment::stateToString(file_segment->state()));
|
||||
res_columns[7]->insert(file_segment->getHitsCount());
|
||||
res_columns[8]->insert(file_segment->getRefCount());
|
||||
res_columns[9]->insert(file_segment->getDownloadedSize());
|
||||
res_columns[10]->insert(file_segment->isPersistent());
|
||||
res_columns[11]->insert(toString(file_segment->getKind()));
|
||||
res_columns[12]->insert(file_segment->isUnbound());
|
||||
res_columns[4]->insert(range.left);
|
||||
res_columns[5]->insert(range.right);
|
||||
res_columns[6]->insert(range.size());
|
||||
res_columns[7]->insert(FileSegment::stateToString(file_segment->state()));
|
||||
res_columns[8]->insert(file_segment->getHitsCount());
|
||||
res_columns[9]->insert(file_segment->getRefCount());
|
||||
res_columns[10]->insert(file_segment->getDownloadedSize(false));
|
||||
res_columns[11]->insert(file_segment->isPersistent());
|
||||
res_columns[12]->insert(toString(file_segment->getKind()));
|
||||
res_columns[13]->insert(file_segment->isUnbound());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
@ -55,15 +56,38 @@ namespace
|
||||
if constexpr (std::is_same_v<Factory, FunctionFactory>)
|
||||
{
|
||||
if (factory.isAlias(name))
|
||||
{
|
||||
res_columns[6]->insertDefault();
|
||||
res_columns[7]->insertDefault();
|
||||
res_columns[8]->insertDefault();
|
||||
res_columns[9]->insertDefault();
|
||||
res_columns[10]->insertDefault();
|
||||
res_columns[11]->insertDefault();
|
||||
}
|
||||
else
|
||||
res_columns[6]->insert(factory.getDocumentation(name).description);
|
||||
{
|
||||
auto documentation = factory.getDocumentation(name);
|
||||
res_columns[6]->insert(documentation.description);
|
||||
res_columns[7]->insertDefault();
|
||||
res_columns[8]->insertDefault();
|
||||
res_columns[9]->insertDefault();
|
||||
res_columns[10]->insert(documentation.examplesAsString());
|
||||
res_columns[11]->insert(documentation.categoriesAsString());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res_columns[6]->insertDefault();
|
||||
res_columns[7]->insertDefault();
|
||||
res_columns[8]->insertDefault();
|
||||
res_columns[9]->insertDefault();
|
||||
res_columns[10]->insertDefault();
|
||||
res_columns[11]->insertDefault();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::pair<String, Int8>> getOriginEnumsAndValues()
|
||||
{
|
||||
return std::vector<std::pair<String, Int8>>{
|
||||
@ -83,6 +107,11 @@ NamesAndTypesList StorageSystemFunctions::getNamesAndTypes()
|
||||
{"create_query", std::make_shared<DataTypeString>()},
|
||||
{"origin", std::make_shared<DataTypeEnum8>(getOriginEnumsAndValues())},
|
||||
{"description", std::make_shared<DataTypeString>()},
|
||||
{"syntax", std::make_shared<DataTypeString>()},
|
||||
{"arguments", std::make_shared<DataTypeString>()},
|
||||
{"returned_value", std::make_shared<DataTypeString>()},
|
||||
{"examples", std::make_shared<DataTypeString>()},
|
||||
{"categories", std::make_shared<DataTypeString>()}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -82,7 +82,7 @@ Pipe StorageSystemRemoteDataPaths::read(
|
||||
|
||||
if (cache)
|
||||
{
|
||||
auto cache_paths = cache->tryGetCachePaths(cache->hash(object.getPathKeyForCache()));
|
||||
auto cache_paths = cache->tryGetCachePaths(cache->createKeyForPath(object.getPathKeyForCache()));
|
||||
col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
|
||||
}
|
||||
else
|
||||
|
@ -55,52 +55,58 @@
|
||||
<type>cache</type>
|
||||
<disk>s3_disk</disk>
|
||||
<path>s3_cache/</path>
|
||||
<max_size>2147483648</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache>
|
||||
<s3_cache_2>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_2</disk>
|
||||
<path>s3_cache_2/</path>
|
||||
<max_size>2Gi</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<max_file_segment_size>100Mi</max_file_segment_size>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_2>
|
||||
<s3_cache_3>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_3</disk>
|
||||
<path>s3_disk_3_cache/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<data_cache_max_size>22548578304</data_cache_max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_3>
|
||||
<s3_cache_4>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_4</disk>
|
||||
<path>s3_cache_4/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<enable_filesystem_query_cache_limit>1</enable_filesystem_query_cache_limit>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_4>
|
||||
<s3_cache_5>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_5</disk>
|
||||
<path>s3_cache_5/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_5>
|
||||
<s3_cache_6>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_6</disk>
|
||||
<path>s3_cache_6/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<enable_bypass_cache_with_threashold>1</enable_bypass_cache_with_threashold>
|
||||
<bypass_cache_threashold>100</bypass_cache_threashold>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_6>
|
||||
<s3_cache_small>
|
||||
<type>cache</type>
|
||||
@ -108,15 +114,17 @@
|
||||
<path>s3_cache_small/</path>
|
||||
<max_size>1000</max_size>
|
||||
<do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_small>
|
||||
<s3_cache_small_segment_size>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk_6</disk>
|
||||
<path>s3_cache_small_segment_size/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<max_size>128Mi</max_size>
|
||||
<max_file_segment_size>10Ki</max_file_segment_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_small_segment_size>
|
||||
<!-- local disks -->
|
||||
<local_disk>
|
||||
@ -139,6 +147,7 @@
|
||||
<max_size>22548578304</max_size>
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</local_cache>
|
||||
<local_cache_2>
|
||||
<type>cache</type>
|
||||
@ -146,6 +155,7 @@
|
||||
<path>local_cache_2/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</local_cache_2>
|
||||
<local_cache_3>
|
||||
<type>cache</type>
|
||||
@ -155,6 +165,7 @@
|
||||
<cache_on_write_operations>1</cache_on_write_operations>
|
||||
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</local_cache_3>
|
||||
<!-- multi layer cache -->
|
||||
<s3_cache_multi>
|
||||
@ -163,6 +174,7 @@
|
||||
<path>s3_cache_multi/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_multi>
|
||||
<s3_cache_multi_2>
|
||||
<type>cache</type>
|
||||
@ -170,6 +182,7 @@
|
||||
<path>s3_cache_multi_2/</path>
|
||||
<max_size>22548578304</max_size>
|
||||
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
|
||||
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
|
||||
</s3_cache_multi_2>
|
||||
</disks>
|
||||
<policies>
|
||||
|
@ -11,6 +11,7 @@
|
||||
<skip_access_check>true</skip_access_check>
|
||||
<!-- Avoid extra retries to speed up tests -->
|
||||
<retry_attempts>0</retry_attempts>
|
||||
<connect_timeout_ms>20000</connect_timeout_ms>
|
||||
</s3>
|
||||
<s3_retryable>
|
||||
<type>s3</type>
|
||||
@ -20,6 +21,7 @@
|
||||
<secret_access_key>minio123</secret_access_key>
|
||||
<!-- ClickHouse starts earlier than custom S3 endpoint. Skip access check to avoid fail on start-up -->
|
||||
<skip_access_check>true</skip_access_check>
|
||||
<connect_timeout_ms>20000</connect_timeout_ms>
|
||||
</s3_retryable>
|
||||
<s3_no_retries>
|
||||
<type>s3</type>
|
||||
@ -32,6 +34,7 @@
|
||||
<!-- Avoid extra retries to speed up tests -->
|
||||
<s3_retry_attempts>1</s3_retry_attempts>
|
||||
<s3_max_single_read_retries>1</s3_max_single_read_retries>
|
||||
<connect_timeout_ms>20000</connect_timeout_ms>
|
||||
</s3_no_retries>
|
||||
<default/>
|
||||
</disks>
|
||||
|
@ -281,7 +281,12 @@ CREATE TABLE system.functions
|
||||
`alias_to` String,
|
||||
`create_query` String,
|
||||
`origin` Enum8('System' = 0, 'SQLUserDefined' = 1, 'ExecutableUserDefined' = 2),
|
||||
`description` String
|
||||
`description` String,
|
||||
`syntax` String,
|
||||
`arguments` String,
|
||||
`returned_value` String,
|
||||
`examples` String,
|
||||
`categories` String
|
||||
)
|
||||
ENGINE = SystemFunctions
|
||||
COMMENT 'SYSTEM TABLE is built on the fly.'
|
||||
|
@ -40,6 +40,8 @@
|
||||
{'key1':1111,'key2':2222,'key5':500,'key6':600}
|
||||
{'key1':1112,'key2':2224,'key5':500,'key6':600}
|
||||
{'key1':1113,'key2':2226,'key5':500,'key6':600}
|
||||
{'key5':500,'key6':600}
|
||||
{'key5':500,'key6':600}
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -11,6 +11,8 @@ SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -
|
||||
|
||||
SELECT mapConcat(col, map('key5', 500), map('key6', 600)) FROM table_map ORDER BY id;
|
||||
SELECT mapConcat(col, materialize(map('key5', 500)), map('key6', 600)) FROM table_map ORDER BY id;
|
||||
SELECT concat(map('key5', 500), map('key6', 600));
|
||||
SELECT map('key5', 500) || map('key6', 600);
|
||||
|
||||
SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id;
|
||||
SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id;
|
||||
|
@ -1,10 +1,60 @@
|
||||
Using storage policy: s3_cache
|
||||
0 79 80
|
||||
0 745 746
|
||||
0 745 746
|
||||
0 745 746
|
||||
0
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Using storage policy: local_cache
|
||||
0 79 80
|
||||
0 745 746
|
||||
0 745 746
|
||||
0 745 746
|
||||
0
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
Expect cache
|
||||
DOWNLOADED 0 79 80
|
||||
DOWNLOADED 0 745 746
|
||||
2
|
||||
Expect no cache
|
||||
|
@ -9,34 +9,69 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
for STORAGE_POLICY in 's3_cache' 'local_cache'; do
|
||||
echo "Using storage policy: $STORAGE_POLICY"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.filesystem_cache"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy"
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy"
|
||||
${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)"
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
echo 'Expect no cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
echo 'Expect no cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3"
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
|
||||
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
|
||||
${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)"
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
echo 'Expect no cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
|
||||
echo 'Expect cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
|
||||
echo 'Expect no cache'
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
|
||||
done
|
||||
|
@ -1,2 +1,2 @@
|
||||
2147483648 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
|
||||
2147483648 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0
|
||||
134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
|
||||
134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0
|
||||
|
Loading…
Reference in New Issue
Block a user