Merge remote-tracking branch 'upstream/master' into better-local-object-storage

This commit is contained in:
kssenii 2023-04-28 12:33:17 +02:00
commit 298ed454ce
57 changed files with 3398 additions and 2540 deletions

View File

@ -176,6 +176,12 @@ if (OS_DARWIN)
set (ENABLE_CURL_BUILD OFF)
endif ()
option(ENABLE_ISAL_LIBRARY "Enable ISA-L library ON by default except on aarch64." ON)
if (ARCH_AARCH64)
# Disable ISA-L libray on aarch64.
set (ENABLE_ISAL_LIBRARY OFF)
endif ()
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# Can be lld or ld-lld or lld-13 or /path/to/lld.
if (LINKER_NAME MATCHES "lld")

View File

@ -191,7 +191,9 @@ add_contrib (google-benchmark-cmake google-benchmark)
add_contrib (ulid-c-cmake ulid-c)
add_contrib (isa-l-cmake isa-l)
if (ENABLE_ISAL_LIBRARY)
add_contrib (isa-l-cmake isa-l)
endif()
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear

View File

@ -1,6 +1,12 @@
set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l")
# check nasm compiler
# The YASM and NASM assembers are somewhat mutually compatible. ISAL specifically needs NASM. If only YASM is installed, then check_language(ASM_NASM)
# below happily finds YASM, leading to weird errors at build time. Therefore, do an explicit check for NASM here.
find_program(NASM_PATH NAMES nasm)
if (NOT NASM_PATH)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif ()
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)

View File

@ -172,8 +172,10 @@ if (TARGET OpenSSL::SSL)
target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
endif()
target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
add_definitions(-DHADOOP_ISAL_LIBRARY)
if (ENABLE_ISAL_LIBRARY)
target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
add_definitions(-DHADOOP_ISAL_LIBRARY)
endif()
add_library(ch_contrib::hdfs ALIAS _hdfs3)

View File

@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
### Install Prerequisites {#install-prerequisites}
``` bash
sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
```
### Install and Use the Clang compiler
@ -92,7 +92,7 @@ If all the components are installed, you may build in the same way as the steps
Example for OpenSUSE Tumbleweed:
``` bash
sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
sudo zypper install git cmake ninja clang-c++ python lld nasm yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build
cmake -S . -B build
@ -103,7 +103,7 @@ Example for Fedora Rawhide:
``` bash
sudo yum update
sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build
cmake -S . -B build

View File

@ -0,0 +1,118 @@
---
slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
sidebar_position: 300
sidebar_label: kolmogorovSmirnovTest
---
# kolmogorovSmirnovTest
Applies Kolmogorov-Smirnov's test to samples from two populations.
**Syntax**
``` sql
kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
```
Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
Samples must belong to continuous, one-dimensional probability distributions.
**Arguments**
- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
**Parameters**
- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
Let F(x) and G(x) be the CDFs of the first and second distributions respectively.
- `'two-sided'`
The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
And the alternative is that the distributions are not identical.
- `'greater'`
The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
- `'less'`.
The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
- `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples.
- `'asymp'` - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
- `'auto'` - the `'exact'` method is used when a maximum number of samples is less than 10'000.
**Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
FROM
(
SELECT
randNormal(0, 10) AS value,
0 AS num
FROM numbers(10000)
UNION ALL
SELECT
randNormal(0, 10) AS value,
1 AS num
FROM numbers(10000)
)
```
Result:
``` text
┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
│ (0.009899999999999996,0.37528595205132287) │
└────────────────────────────────────────────────────┘
```
Note:
P-value is bigger than 0.05 (for confidence level of 95%), so null hypothesis is not rejected.
Query:
``` sql
SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
FROM
(
SELECT
randStudentT(10) AS value,
0 AS num
FROM numbers(100)
UNION ALL
SELECT
randNormal(0, 10) AS value,
1 AS num
FROM numbers(100)
)
```
Result:
``` text
┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
│ (0.4100000000000002,6.61735760482795e-8) │
└─────────────────────────────────────────────────────────┘
```
Note:
P-value is less than 0.05 (for confidence level of 95%), so null hypothesis is rejected.
**See Also**
- [Kolmogorov-Smirnov'test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test)

View File

@ -26,6 +26,7 @@
#include <Common/TLDListsHolder.h>
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
#include <Common/ThreadPool.h>
#include <Loggers/Loggers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>

View File

@ -0,0 +1,30 @@
#include <Common/Documentation.h>
namespace DB
{
std::string Documentation::examplesAsString() const
{
std::string res;
for (const auto & [example_name, example_query] : examples)
{
res += example_name + ":\n\n";
res += "```sql\n";
res += example_query + "\n";
res += "```\n";
}
return res;
}
std::string Documentation::categoriesAsString() const
{
if (categories.empty())
return "";
std::string res = categories[0];
for (size_t i = 1; i < categories.size(); ++i)
res += ", " + categories[i];
return res;
}
}

View File

@ -42,27 +42,44 @@ namespace DB
*
* Documentation does not support multiple languages.
* The only available language is English.
*
* TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
* TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
*/
struct Documentation
{
using Description = std::string;
using Syntax = std::string;
using Argument = std::string;
using Arguments = std::vector<Argument>;
using ReturnedValue = std::string;
using ExampleName = std::string;
using ExampleQuery = std::string;
using Examples = std::map<ExampleName, ExampleQuery>;
using Category = std::string;
using Categories = std::vector<Category>;
using Related = std::string;
Description description;
Examples examples;
Categories categories;
Documentation(Description description_) : description(std::move(description_)) {}
Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT
Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {}
Documentation(Description description_, Examples examples_, Categories categories_)
: description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
/// TODO: Please remove this constructor. Documentation should always be non-empty.
Documentation() {}
Documentation() = default;
std::string examplesAsString() const;
std::string categoriesAsString() const;
};
}

View File

@ -1,6 +1,7 @@
#include "CachedOnDiskReadBufferFromFile.h"
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
#include <IO/ReadBufferFromFile.h>
#include <base/scope_guard.h>
#include <Common/assert_cast.h>
@ -115,27 +116,25 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
{
file_segments_holder.emplace(cache->get(cache_key, offset, size));
file_segments = cache->get(cache_key, offset, size);
}
else
{
CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size, create_settings));
file_segments = cache->getOrSet(cache_key, offset, size, create_settings);
}
/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
*/
if (file_segments_holder->file_segments.empty())
if (file_segments->empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty");
LOG_TEST(
log,
"Having {} file segments to read: {}, current offset: {}",
file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
current_file_segment_it = file_segments_holder->file_segments.begin();
file_segments->size(), file_segments->toString(), file_offset_of_buffer_end);
initialized = true;
}
@ -165,7 +164,7 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_)
CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_)
{
switch (read_type_)
{
@ -201,7 +200,7 @@ CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment
}
else
{
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
}
return remote_fs_segment_reader;
@ -238,27 +237,27 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
/// requested_range: [__________]
/// ^
/// current_offset
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset();
size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true);
return first_non_downloaded_offset > current_offset;
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_segment)
{
auto download_state = file_segment->state();
auto download_state = file_segment.state();
if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
{
if (download_state == FileSegment::State::DOWNLOADED)
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(*file_segment);
return getCacheReadBuffer(file_segment);
}
else
{
LOG_TEST(log, "Bypassing cache because `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` option is used");
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
return getRemoteFSReadBuffer(*file_segment, read_type);
return getRemoteReadBuffer(file_segment, read_type);
}
}
@ -266,15 +265,15 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
{
switch (download_state)
{
case FileSegment::State::SKIP_CACHE:
case FileSegment::State::DETACHED:
{
LOG_TRACE(log, "Bypassing cache because file segment state is `SKIP_CACHE`");
LOG_TRACE(log, "Bypassing cache because file segment state is `DETACHED`");
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
return getRemoteFSReadBuffer(*file_segment, read_type);
return getRemoteReadBuffer(file_segment, read_type);
}
case FileSegment::State::DOWNLOADING:
{
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
{
/// segment{k} state: DOWNLOADING
/// cache: [______|___________
@ -285,21 +284,21 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end
read_type = ReadType::CACHED;
return getCacheReadBuffer(*file_segment);
return getCacheReadBuffer(file_segment);
}
download_state = file_segment->wait();
download_state = file_segment.wait(file_offset_of_buffer_end);
continue;
}
case FileSegment::State::DOWNLOADED:
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(*file_segment);
return getCacheReadBuffer(file_segment);
}
case FileSegment::State::EMPTY:
case FileSegment::State::PARTIALLY_DOWNLOADED:
{
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
{
/// segment{k} state: PARTIALLY_DOWNLOADED
/// cache: [______|___________
@ -310,13 +309,13 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end
read_type = ReadType::CACHED;
return getCacheReadBuffer(*file_segment);
return getCacheReadBuffer(file_segment);
}
auto downloader_id = file_segment->getOrSetDownloader();
if (downloader_id == file_segment->getCallerId())
auto downloader_id = file_segment.getOrSetDownloader();
if (downloader_id == file_segment.getCallerId())
{
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
{
/// segment{k}
/// cache: [______|___________
@ -327,11 +326,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end
read_type = ReadType::CACHED;
file_segment->resetDownloader();
return getCacheReadBuffer(*file_segment);
file_segment.resetDownloader();
return getCacheReadBuffer(file_segment);
}
if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
if (current_write_offset < file_offset_of_buffer_end)
{
/// segment{1}
/// cache: [_____|___________
@ -341,25 +341,25 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// ^
/// file_offset_of_buffer_end
LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
chassert(bytes_to_predownload < file_segment->range().size());
LOG_TEST(log, "Predownload. File segment info: {}", file_segment.getInfoForLog());
chassert(file_offset_of_buffer_end > current_write_offset);
bytes_to_predownload = file_offset_of_buffer_end - current_write_offset;
chassert(bytes_to_predownload < file_segment.range().size());
}
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
return getRemoteFSReadBuffer(*file_segment, read_type);
return getRemoteReadBuffer(file_segment, read_type);
}
download_state = file_segment->state();
download_state = file_segment.state();
continue;
}
case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
{
if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
if (canStartFromCache(file_offset_of_buffer_end, file_segment))
{
read_type = ReadType::CACHED;
return getCacheReadBuffer(*file_segment);
return getCacheReadBuffer(file_segment);
}
else
{
@ -367,7 +367,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
log,
"Bypassing cache because file segment state is `PARTIALLY_DOWNLOADED_NO_CONTINUATION` and downloaded part already used");
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
return getRemoteFSReadBuffer(*file_segment, read_type);
return getRemoteReadBuffer(file_segment, read_type);
}
}
}
@ -375,12 +375,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
}
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_segment)
CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segment)
{
chassert(!file_segment->isDownloader());
chassert(file_offset_of_buffer_end >= file_segment->range().left);
chassert(!file_segment.isDownloader());
chassert(file_offset_of_buffer_end >= file_segment.range().left);
auto range = file_segment->range();
auto range = file_segment.range();
bytes_to_predownload = 0;
Stopwatch watch(CLOCK_MONOTONIC);
@ -392,17 +392,18 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
[[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
chassert(download_current_segment == file_segment->isDownloader());
chassert(download_current_segment == file_segment.isDownloader());
chassert(file_segment->range() == range);
chassert(file_segment.range() == range);
chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
LOG_TEST(
log,
"Current file segment: {}, read type: {}, current file offset: {}",
range.toString(),
"Current read type: {}, read offset: {}, impl read range: {}, file segment: {}",
toString(read_type),
file_offset_of_buffer_end);
file_offset_of_buffer_end,
read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
file_segment.getInfoForLog());
read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
@ -444,11 +445,11 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
}
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
{
chassert(file_segment->isDownloader());
chassert(file_segment.isDownloader());
if (bytes_to_predownload)
{
size_t current_write_offset = file_segment->getCurrentWriteOffset();
const size_t current_write_offset = file_segment.getCurrentWriteOffset(false);
read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET);
}
else
@ -458,7 +459,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
assert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
}
auto current_write_offset = file_segment->getCurrentWriteOffset();
const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
if (current_write_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
{
throw Exception(
@ -469,7 +470,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
current_write_offset,
read_buffer_for_file_segment->getPosition(),
read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
file_segment->getInfoForLog());
file_segment.getInfoForLog());
}
break;
@ -483,52 +484,46 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
{
LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString());
auto * current_file_segment = &file_segments->front();
auto completed_range = current_file_segment->range();
if (enable_logging)
appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
appendFilesystemCacheLog(completed_range, read_type);
auto file_segment_it = current_file_segment_it++;
auto & file_segment = *file_segment_it;
[[maybe_unused]] const auto & range = file_segment->range();
chassert(file_offset_of_buffer_end > range.right);
LOG_TEST(
log,
"Removing file segment: {}, downloader: {}, state: {}",
file_segment->range().toString(),
file_segment->getDownloader(),
file_segment->state());
/// Do not hold pointer to file segment if it is not needed anymore
/// so can become releasable and can be evicted from cache.
file_segment->completeWithoutState();
file_segments_holder->file_segments.erase(file_segment_it);
if (current_file_segment_it == file_segments_holder->file_segments.end())
return false;
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
chassert(file_offset_of_buffer_end > completed_range.right);
if (read_type == ReadType::CACHED)
(*current_file_segment_it)->incrementHitsCount();
{
chassert(current_file_segment->getDownloadedSize(true) == current_file_segment->range().size());
}
file_segments->popFront();
if (file_segments->empty())
return false;
current_file_segment = &file_segments->front();
current_file_segment->use();
implementation_buffer = getImplementationBuffer(*current_file_segment);
if (read_type == ReadType::CACHED)
current_file_segment->incrementHitsCount();
LOG_TEST(
log, "New segment range: {}, old range: {}",
current_file_segment->range().toString(), completed_range.toString());
LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString());
return true;
}
CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile()
{
if (enable_logging
&& file_segments_holder
&& current_file_segment_it != file_segments_holder->file_segments.end())
if (enable_logging && file_segments && !file_segments->empty())
{
appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
appendFilesystemCacheLog(file_segments->front().range(), read_type);
}
}
void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
{
Stopwatch predownload_watch(CLOCK_MONOTONIC);
SCOPE_EXIT({
@ -547,9 +542,10 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
/// download from offset a'' < a', but return buffer from offset a'.
LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment->getCurrentWriteOffset());
size_t current_offset = file_segment->getCurrentWriteOffset();
const auto & current_range = file_segment->range();
/// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false));
size_t current_offset = file_segment.getCurrentWriteOffset(false);
const auto & current_range = file_segment.range();
while (true)
{
@ -574,7 +570,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
"current download offset: {}, expected: {}, eof: {}",
bytes_to_predownload,
current_range.toString(),
file_segment->getCurrentWriteOffset(),
file_segment.getCurrentWriteOffset(false),
file_offset_of_buffer_end,
implementation_buffer->eof());
@ -584,7 +580,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
{
nextimpl_working_buffer_offset = implementation_buffer->offset();
auto current_write_offset = file_segment->getCurrentWriteOffset();
auto current_write_offset = file_segment.getCurrentWriteOffset(false);
if (current_write_offset != static_cast<size_t>(implementation_buffer->getPosition())
|| current_write_offset != file_offset_of_buffer_end)
{
@ -596,7 +592,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
current_write_offset,
file_offset_of_buffer_end,
implementation_buffer->getPosition(),
file_segment->getInfoForLog());
file_segment.getInfoForLog());
}
}
@ -608,15 +604,15 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
bool continue_predownload = file_segment->reserve(current_predownload_size);
bool continue_predownload = file_segment.reserve(current_predownload_size);
if (continue_predownload)
{
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
bool success = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, *file_segment);
if (success)
continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment);
if (continue_predownload)
{
current_offset += current_predownload_size;
@ -626,13 +622,8 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
else
{
LOG_TEST(log, "Bypassing cache because writeCache (in predownload) method failed");
continue_predownload = false;
}
}
else
{
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
}
if (!continue_predownload)
{
@ -652,21 +643,21 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
/// TODO: allow seek more than once with seek avoiding.
bytes_to_predownload = 0;
file_segment.completePartAndResetDownloader();
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION
|| file_segment->state() == FileSegment::State::SKIP_CACHE);
LOG_TEST(log, "Bypassing cache because for {}", file_segment->getInfoForLog());
LOG_TEST(log, "Bypassing cache because for {}", file_segment.getInfoForLog());
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
swap(*implementation_buffer);
resetWorkingBuffer();
implementation_buffer = getRemoteFSReadBuffer(*file_segment, read_type);
implementation_buffer = getRemoteReadBuffer(file_segment, read_type);
swap(*implementation_buffer);
implementation_buffer->setReadUntilPosition(file_segment->range().right + 1); /// [..., range.right]
implementation_buffer->setReadUntilPosition(file_segment.range().right + 1); /// [..., range.right]
implementation_buffer->seek(file_offset_of_buffer_end, SEEK_SET);
LOG_TRACE(
@ -683,12 +674,12 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
{
auto & file_segment = *current_file_segment_it;
auto current_read_range = file_segment->range();
auto current_state = file_segment->state();
auto & file_segment = file_segments->front();
const auto & current_read_range = file_segment.range();
auto current_state = file_segment.state();
chassert(current_read_range.left <= file_offset_of_buffer_end);
chassert(!file_segment->isDownloader());
chassert(!file_segment.isDownloader());
if (file_offset_of_buffer_end > current_read_range.right)
{
@ -707,7 +698,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
/// ^
/// file_offset_of_buffer_end
auto current_write_offset = file_segment->getCurrentWriteOffset();
auto current_write_offset = file_segment.getCurrentWriteOffset(true);
bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end;
LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end);
@ -715,7 +706,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
if (cached_part_is_finished)
{
/// TODO: makes sense to reuse local file reader if we return here with CACHED read type again?
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
implementation_buffer = getImplementationBuffer(file_segment);
return true;
}
@ -743,7 +734,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
* to read by marks range given to him. Therefore, each nextImpl() call, in case of
* READ_AND_PUT_IN_CACHE, starts with getOrSetDownloader().
*/
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
implementation_buffer = getImplementationBuffer(file_segment);
}
return true;
@ -794,15 +785,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
last_caller_id = FileSegment::getCallerId();
assertCorrectness();
if (file_offset_of_buffer_end == read_until_position)
return false;
if (!initialized)
initialize(file_offset_of_buffer_end, getTotalSizeToRead());
if (current_file_segment_it == file_segments_holder->file_segments.end())
if (file_segments->empty())
return false;
bool implementation_buffer_can_be_reused = false;
@ -812,25 +801,25 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
/// Save state of current file segment before it is completed.
nextimpl_step_log_info = getInfoForLog();
if (current_file_segment_it == file_segments_holder->file_segments.end())
if (file_segments->empty())
return;
auto & file_segment = *current_file_segment_it;
auto & file_segment = file_segments->front();
bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
if (download_current_segment)
{
bool need_complete_file_segment = file_segment->isDownloader();
bool need_complete_file_segment = file_segment.isDownloader();
if (need_complete_file_segment)
{
if (!implementation_buffer_can_be_reused)
file_segment->resetRemoteFileReader();
file_segment.resetRemoteFileReader();
file_segment->completePartAndResetDownloader();
file_segment.completePartAndResetDownloader();
}
}
chassert(!file_segment->isDownloader());
chassert(!file_segment.isDownloader());
}
catch (...)
{
@ -848,10 +837,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
}
else
{
implementation_buffer = getImplementationBuffer(*current_file_segment_it);
implementation_buffer = getImplementationBuffer(file_segments->front());
if (read_type == ReadType::CACHED)
(*current_file_segment_it)->incrementHitsCount();
file_segments->front().incrementHitsCount();
}
chassert(!internal_buffer.empty());
@ -862,16 +851,16 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
// the caller doesn't try to use this CachedOnDiskReadBufferFromFile after it threw an exception.)
swap(*implementation_buffer);
auto & file_segment = *current_file_segment_it;
auto current_read_range = file_segment->range();
auto & file_segment = file_segments->front();
const auto & current_read_range = file_segment.range();
LOG_TEST(
log,
"Current count: {}, position: {}, buffer end: {}, file segment: {}",
implementation_buffer->count(),
implementation_buffer->getPosition(),
"Current read type: {}, read offset: {}, impl offset: {}, file segment: {}",
toString(read_type),
file_offset_of_buffer_end,
implementation_buffer->getFileOffsetOfBufferEnd(),
file_segment->getInfoForLog());
file_segment.getInfoForLog());
chassert(current_read_range.left <= file_offset_of_buffer_end);
chassert(current_read_range.right >= file_offset_of_buffer_end);
@ -889,12 +878,12 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
}
auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
if (download_current_segment != file_segment->isDownloader())
if (download_current_segment != file_segment.isDownloader())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Incorrect segment state. Having read type: {}, file segment info: {}",
toString(read_type), file_segment->getInfoForLog());
toString(read_type), file_segment.getInfoForLog());
}
if (!result)
@ -936,7 +925,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
log,
"Read {} bytes, read type {}, position: {}, offset: {}, segment end: {}",
size, toString(read_type), implementation_buffer->getPosition(),
implementation_buffer->getFileOffsetOfBufferEnd(), file_segment->range().right);
implementation_buffer->getFileOffsetOfBufferEnd(), file_segment.range().right);
if (read_type == ReadType::CACHED)
{
@ -954,20 +943,20 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{
if (download_current_segment)
{
chassert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right);
chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
bool success = file_segment->reserve(size);
bool success = file_segment.reserve(size);
if (success)
{
chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, *file_segment);
success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment);
if (success)
{
chassert(file_segment->getCurrentWriteOffset() <= file_segment->range().right + 1);
chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1);
chassert(
std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
|| file_segment->getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
/* last_file_segment */file_segments->size() == 1
|| file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
LOG_TEST(log, "Successfully written {} bytes", size);
@ -979,20 +968,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
}
else
{
chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
LOG_TRACE(log, "Bypassing cache because writeCache method failed");
}
}
else
{
LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size);
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
}
if (!success)
{
read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
download_current_segment = false;
}
}
@ -1002,7 +984,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
/// Therefore need to resize to a smaller size. And resize must be done after write into cache.
/// - If last file segment was read from local fs, then we could read more than
/// file_segemnt->range().right, so resize is also needed.
if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end())
if (file_segments->size() == 1)
{
size_t remaining_size_to_read
= std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1;
@ -1022,17 +1004,17 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
// No necessary because of the SCOPE_EXIT above, but useful for logging below.
if (download_current_segment)
file_segment->completePartAndResetDownloader();
file_segment.completePartAndResetDownloader();
chassert(!file_segment->isDownloader());
chassert(!file_segment.isDownloader());
LOG_TEST(
log,
"Key: {}. Returning with {} bytes, buffer position: {} (offset: {}, predownloaded: {}), "
"buffer available: {}, current range: {}, current offset: {}, file segment state: {}, "
"buffer available: {}, current range: {}, file offset of buffer end: {}, impl offset: {}, file segment state: {}, "
"current write offset: {}, read_type: {}, reading until position: {}, started with offset: {}, "
"remaining ranges: {}",
getHexUIntLowercase(cache_key),
cache_key.toString(),
working_buffer.size(),
getPosition(),
offset(),
@ -1040,12 +1022,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
available(),
current_read_range.toString(),
file_offset_of_buffer_end,
FileSegment::stateToString(file_segment->state()),
file_segment->getCurrentWriteOffset(),
implementation_buffer->getFileOffsetOfBufferEnd(),
FileSegment::stateToString(file_segment.state()),
file_segment.getCurrentWriteOffset(false),
toString(read_type),
read_until_position,
first_offset,
file_segments_holder->toString());
file_segments->toString());
if (size == 0 && file_offset_of_buffer_end < read_until_position)
{
@ -1064,7 +1047,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
cache_file_size ? std::to_string(cache_file_size) : "None",
cache_file_path,
implementation_buffer->getFileOffsetOfBufferEnd(),
file_segment->getInfoForLog());
file_segment.getInfoForLog());
}
return result;
@ -1112,13 +1095,13 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
first_offset = file_offset_of_buffer_end = new_pos;
resetWorkingBuffer();
// if (file_segments_holder && current_file_segment_it != file_segments_holder->file_segments.end())
// if (file_segments && current_file_segment_it != file_segments->file_segments.end())
// {
// auto & file_segments = file_segments_holder->file_segments;
// auto & file_segments = file_segments->file_segments;
// LOG_TRACE(
// log,
// "Having {} file segments to read: {}, current offset: {}",
// file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
// file_segments->file_segments.size(), file_segments->toString(), file_offset_of_buffer_end);
// auto it = std::upper_bound(
// file_segments.begin(),
@ -1149,7 +1132,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
// }
// }
file_segments_holder.reset();
file_segments.reset();
implementation_buffer.reset();
initialized = false;
@ -1184,7 +1167,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position)
file_offset_of_buffer_end = getPosition();
resetWorkingBuffer();
file_segments_holder.reset();
file_segments.reset();
implementation_buffer.reset();
initialized = false;
@ -1203,25 +1186,9 @@ off_t CachedOnDiskReadBufferFromFile::getPosition()
return file_offset_of_buffer_end - available();
}
std::optional<size_t> CachedOnDiskReadBufferFromFile::getLastNonDownloadedOffset() const
{
if (!file_segments_holder)
throw Exception(ErrorCodes::LOGICAL_ERROR, "File segments holder not initialized");
const auto & file_segments = file_segments_holder->file_segments;
for (auto it = file_segments.rbegin(); it != file_segments.rend(); ++it)
{
const auto & file_segment = *it;
if (file_segment->state() != FileSegment::State::DOWNLOADED)
return file_segment->range().right;
}
return std::nullopt;
}
void CachedOnDiskReadBufferFromFile::assertCorrectness() const
{
if (FileCache::isReadOnly()
if (!CachedObjectStorage::canUseReadThroughCache()
&& !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id);
}
@ -1229,16 +1196,16 @@ void CachedOnDiskReadBufferFromFile::assertCorrectness() const
String CachedOnDiskReadBufferFromFile::getInfoForLog()
{
String current_file_segment_info;
if (current_file_segment_it != file_segments_holder->file_segments.end())
current_file_segment_info = (*current_file_segment_it)->getInfoForLog();
else
if (file_segments->empty())
current_file_segment_info = "None";
else
current_file_segment_info = file_segments->front().getInfoForLog();
return fmt::format(
"Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, read_until_position: {}, "
"internal buffer end: {}, read_type: {}, last caller: {}, file segment info: {}",
source_file_path,
getHexUIntLowercase(cache_key),
cache_key.toString(),
file_offset_of_buffer_end,
read_until_position,
implementation_buffer ? std::to_string(implementation_buffer->getFileOffsetOfBufferEnd()) : "None",

View File

@ -62,26 +62,29 @@ public:
private:
using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;
ImplementationBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment);
void initialize(size_t offset, size_t size);
void assertCorrectness() const;
ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
/**
* Return a list of file segments ordered in ascending order. This list represents
* a full contiguous interval (without holes).
*/
FileSegmentsHolderPtr getFileSegments(size_t offset, size_t size) const;
ImplementationBufferPtr getImplementationBuffer(FileSegment & file_segment);
ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment);
ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
std::optional<size_t> getLastNonDownloadedOffset() const;
ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_);
bool updateImplementationBufferIfNeeded();
void predownload(FileSegmentPtr & file_segment);
void predownload(FileSegment & file_segment);
bool nextImplStep();
void initialize(size_t offset, size_t size);
void assertCorrectness() const;
std::shared_ptr<ReadBufferFromFileBase> getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_);
size_t getTotalSizeToRead();
bool completeFileSegmentAndGetNext();
@ -108,8 +111,7 @@ private:
/// Remote read buffer, which can only be owned by current buffer.
FileSegment::RemoteFileReaderPtr remote_file_reader;
std::optional<FileSegmentsHolder> file_segments_holder;
FileSegments::iterator current_file_segment_it;
FileSegmentsHolderPtr file_segments;
ImplementationBufferPtr implementation_buffer;
bool initialized = false;
@ -143,7 +145,7 @@ private:
CurrentMetrics::Increment metric_increment{CurrentMetrics::FilesystemCacheReadBuffers};
ProfileEvents::Counters current_file_segment_counters;
FileCache::QueryContextHolder query_context_holder;
FileCache::QueryContextHolderPtr query_context_holder;
bool is_persistent;
};

View File

@ -50,27 +50,29 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
offset, expected_write_offset);
}
auto & file_segments = file_segments_holder.file_segments;
FileSegment * file_segment;
if (file_segments.empty() || file_segments.back()->isDownloaded())
if (file_segments.empty() || file_segments.back().isDownloaded())
{
allocateFileSegment(expected_write_offset, segment_kind);
file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
}
else
{
file_segment = &file_segments.back();
}
auto & file_segment = file_segments.back();
SCOPE_EXIT({
if (file_segments.back()->isDownloader())
file_segments.back()->completePartAndResetDownloader();
if (file_segments.back().isDownloader())
file_segments.back().completePartAndResetDownloader();
});
while (size > 0)
{
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize();
size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false);
if (available_size == 0)
{
completeFileSegment(*file_segment);
file_segment = allocateFileSegment(expected_write_offset, segment_kind);
file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
continue;
}
@ -86,7 +88,6 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
bool reserved = file_segment->reserve(size_to_write);
if (!reserved)
{
file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
appendFilesystemCacheLog(*file_segment);
LOG_DEBUG(
@ -113,11 +114,10 @@ void FileSegmentRangeWriter::finalize()
if (finalized)
return;
auto & file_segments = file_segments_holder.file_segments;
if (file_segments.empty())
return;
completeFileSegment(*file_segments.back());
completeFileSegment(file_segments.back());
finalized = true;
}
@ -134,24 +134,21 @@ FileSegmentRangeWriter::~FileSegmentRangeWriter()
}
}
FileSegmentPtr & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
FileSegment & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
{
/**
* Allocate a new file segment starting `offset`.
* File segment capacity will equal `max_file_segment_size`, but actual size is 0.
*/
std::lock_guard cache_lock(cache->mutex);
CreateFileSegmentSettings create_settings(segment_kind);
CreateFileSegmentSettings create_settings(segment_kind, false);
/// We set max_file_segment_size to be downloaded,
/// if we have less size to write, file segment will be resized in complete() method.
auto file_segment = cache->createFileSegmentForDownload(
key, offset, cache->max_file_segment_size, create_settings, cache_lock);
auto & file_segments = file_segments_holder.file_segments;
return *file_segments.insert(file_segments.end(), file_segment);
auto holder = cache->set(key, offset, cache->getMaxFileSegmentSize(), create_settings);
chassert(holder->size() == 1);
holder->moveTo(file_segments);
return file_segments.back();
}
void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment)
@ -159,7 +156,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
if (cache_log)
{
auto file_segment_range = file_segment.range();
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1;
FilesystemCacheLogElement elem
{
@ -185,7 +182,7 @@ void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
if (file_segment.isDetached() || file_segment.isCompleted())
return;
file_segment.completeWithoutState();
file_segment.complete();
appendFilesystemCacheLog(file_segment);
}
@ -224,7 +221,7 @@ void CachedOnDiskWriteBufferFromFile::nextImpl()
{
/// If something was already written to cache, remove it.
cache_writer.reset();
cache->removeIfExists(key);
cache->removeKeyIfExists(key);
throw;
}

View File

@ -39,7 +39,7 @@ public:
~FileSegmentRangeWriter();
private:
FileSegmentPtr & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
FileSegment & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
void appendFilesystemCacheLog(const FileSegment & file_segment);
@ -53,7 +53,7 @@ private:
String query_id;
String source_path;
FileSegmentsHolder file_segments_holder{};
FileSegmentsHolder file_segments{};
size_t expected_write_offset = 0;

View File

@ -3,6 +3,7 @@
#include <IO/SeekableReadBuffer.h>
#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
#include <Common/logger_useful.h>
#include <iostream>
#include <base/hex.h>
@ -56,7 +57,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
if (with_cache)
{
auto cache_key = settings.remote_fs_cache->hash(object_path);
auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
return std::make_shared<CachedOnDiskReadBufferFromFile>(
object_path,
cache_key,

View File

@ -43,13 +43,7 @@ DataSourceDescription CachedObjectStorage::getDataSourceDescription() const
FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
{
return cache->hash(path);
}
String CachedObjectStorage::getCachePath(const std::string & path) const
{
FileCache::Key cache_key = getCacheKey(path);
return cache->getPathInLocalCache(cache_key);
return cache->createKeyForPath(path);
}
std::string CachedObjectStorage::generateBlobNameForPath(const std::string & path)
@ -62,7 +56,7 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
ReadSettings modified_settings{read_settings};
modified_settings.remote_fs_cache = cache;
if (FileCache::isReadOnly())
if (!canUseReadThroughCache())
modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
return object_storage->patchSettings(modified_settings);
@ -122,7 +116,6 @@ std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// N
if (cache_on_write)
{
auto key = getCacheKey(path_key_for_cache);
LOG_TEST(log, "Caching file `{}` to `{}` with key {}", object.absolute_path, getCachePath(path_key_for_cache), key.toString());
return std::make_unique<CachedOnDiskWriteBufferFromFile>(
std::move(implementation_buffer),
@ -143,7 +136,7 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c
return;
/// Add try catch?
cache->removeIfExists(getCacheKey(path_key_for_cache));
cache->removeKeyIfExists(getCacheKey(path_key_for_cache));
}
void CachedObjectStorage::removeObject(const StoredObject & object)
@ -238,4 +231,11 @@ String CachedObjectStorage::getObjectsNamespace() const
return object_storage->getObjectsNamespace();
}
bool CachedObjectStorage::canUseReadThroughCache()
{
return CurrentThread::isInitialized()
&& CurrentThread::get().getQueryContext()
&& !CurrentThread::getQueryId().empty();
}
}

View File

@ -113,11 +113,11 @@ public:
WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override;
static bool canUseReadThroughCache();
private:
FileCache::Key getCacheKey(const std::string & path) const;
String getCachePath(const std::string & path) const;
ReadSettings patchSettings(const ReadSettings & read_settings) const override;
ObjectStoragePtr object_storage;

View File

@ -15,6 +15,7 @@
#include <Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h>
#include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
#include <Disks/FakeDiskTransaction.h>
#include <Common/ThreadPool.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Interpreters/Context.h>

View File

@ -128,7 +128,7 @@ std::unique_ptr<S3::Client> getClient(
if (uri.key.back() != '/')
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
client_configuration.endpointOverride = uri.endpoint;

View File

@ -205,6 +205,10 @@ public:
{
return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
}
else if (isMap(arguments.at(0).type))
{
return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
}
else
return std::make_unique<FunctionToFunctionBaseAdaptor>(
FunctionConcat::create(context), collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type);

View File

@ -203,11 +203,13 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
return offset_;
if (impl && restricted_seek)
{
throw Exception(
ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
"Seek is allowed only before first read attempt from the buffer (current offset: "
"{}, new offset: {}, reading until position: {}, available: {})",
getPosition(), offset_, read_until_position, available());
ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
"Seek is allowed only before first read attempt from the buffer (current offset: "
"{}, new offset: {}, reading until position: {}, available: {})",
getPosition(), offset_, read_until_position, available());
}
if (whence != SEEK_SET)
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed.");

File diff suppressed because it is too large Load Diff

View File

@ -9,43 +9,52 @@
#include <unordered_map>
#include <unordered_set>
#include <boost/functional/hash.hpp>
#include <boost/noncopyable.hpp>
#include <Core/Types.h>
#include <Common/ThreadPool.h>
#include <IO/ReadSettings.h>
#include <Interpreters/Cache/IFileCachePriority.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Core/BackgroundSchedulePool.h>
#include <Interpreters/Cache/LRUFileCachePriority.h>
#include <Interpreters/Cache/FileCache_fwd.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Interpreters/Cache/Metadata.h>
#include <Interpreters/Cache/QueryLimit.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
/// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
/// Different caching algorithms are implemented using IFileCachePriority.
class FileCache : private boost::noncopyable
{
friend class FileSegment;
friend class IFileCachePriority;
friend struct FileSegmentsHolder;
friend class FileSegmentRangeWriter;
struct QueryContext;
using QueryContextPtr = std::shared_ptr<QueryContext>;
public:
using Key = DB::FileCacheKey;
using QueryLimit = DB::FileCacheQueryLimit;
using Priority = IFileCachePriority;
using PriorityEntry = IFileCachePriority::Entry;
using PriorityIterator = IFileCachePriority::Iterator;
using PriorityIterationResult = IFileCachePriority::IterationResult;
explicit FileCache(const FileCacheSettings & settings);
~FileCache() = default;
~FileCache();
void initialize();
const String & getBasePath() const { return cache_base_path; }
const String & getBasePath() const;
static Key createKeyForPath(const String & path);
String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
String getPathInLocalCache(const Key & key) const;
/**
* Given an `offset` and `size` representing [offset, offset + size) bytes interval,
@ -58,8 +67,7 @@ public:
* As long as pointers to returned file segments are held
* it is guaranteed that these file segments are not removed from cache.
*/
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
FileSegmentsHolder set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
/**
* Segments in returned list are ordered in ascending order and represent a full contiguous
@ -70,53 +78,40 @@ public:
* with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
* it's state (and become DOWNLOADED).
*/
FileSegmentsHolder get(const Key & key, size_t offset, size_t size);
FileSegmentsHolderPtr get(const Key & key, size_t offset, size_t size);
FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
/// Remove files by `key`. Removes files which might be used at the moment.
void removeIfExists(const Key & key);
void removeKeyIfExists(const Key & key);
/// Remove files by `key`. Will not remove files which are used at the moment.
void removeIfReleasable();
static Key hash(const String & path);
String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
String getPathInLocalCache(const Key & key) const;
void removeAllReleasable();
std::vector<String> tryGetCachePaths(const Key & key);
size_t capacity() const { return max_size; }
size_t getUsedCacheSize() const;
size_t getFileSegmentsNum() const;
static bool isReadOnly();
size_t getMaxFileSegmentSize() const { return max_file_segment_size; }
/**
* Create a file segment of exactly requested size with EMPTY state.
* Throw exception if requested size exceeds max allowed file segment size.
* This method is for protected usage: file segment range writer uses it
* to dynamically allocate file segments.
*/
FileSegmentPtr createFileSegmentForDownload(
const Key & key,
size_t offset,
size_t size,
const CreateFileSegmentSettings & create_settings,
std::lock_guard<std::mutex> & cache_lock);
bool tryReserve(FileSegment & file_segment, size_t size);
FileSegments getSnapshot() const;
FileSegmentsHolderPtr getSnapshot();
/// For debug.
String dumpStructure(const Key & key);
FileSegmentsHolderPtr getSnapshot(const Key & key);
/// Save a query context information, and adopt different cache policies
/// for different queries through the context cache layer.
FileSegmentsHolderPtr dumpQueue();
void cleanup();
void deactivateBackgroundOperations();
/// For per query cache limit.
struct QueryContextHolder : private boost::noncopyable
{
QueryContextHolder(const String & query_id_, FileCache * cache_, QueryContextPtr context_);
QueryContextHolder(const String & query_id_, FileCache * cache_, QueryLimit::QueryContextPtr context_);
QueryContextHolder() = default;
@ -124,198 +119,95 @@ public:
String query_id;
FileCache * cache = nullptr;
QueryContextPtr context;
QueryLimit::QueryContextPtr context;
};
using QueryContextHolderPtr = std::unique_ptr<QueryContextHolder>;
QueryContextHolderPtr getQueryContextHolder(const String & query_id, const ReadSettings & settings);
QueryContextHolder getQueryContextHolder(const String & query_id, const ReadSettings & settings);
CacheGuard::Lock lockCache() { return cache_guard.lock(); }
private:
String cache_base_path;
using KeyAndOffset = FileCacheKeyAndOffset;
const size_t max_size;
const size_t max_element_size;
const size_t max_file_segment_size;
const bool allow_persistent_files;
const size_t enable_cache_hits_threshold;
const bool enable_filesystem_query_cache_limit;
const size_t bypass_cache_threshold = 0;
const size_t delayed_cleanup_interval_ms;
const bool enable_bypass_cache_with_threashold;
const size_t bypass_cache_threashold;
mutable std::mutex mutex;
Poco::Logger * log;
bool is_initialized = false;
std::exception_ptr initialization_exception;
std::exception_ptr init_exception;
std::atomic<bool> is_initialized = false;
mutable std::mutex init_mutex;
void assertInitialized(std::lock_guard<std::mutex> & cache_lock) const;
CacheMetadata metadata;
bool tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
FileCachePriorityPtr main_priority;
mutable CacheGuard cache_guard;
void remove(
Key key,
size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::unique_lock<std::mutex> & segment_lock);
void remove(
FileSegmentPtr file_segment,
std::lock_guard<std::mutex> & cache_lock);
bool isLastFileSegmentHolder(
const Key & key,
size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::unique_lock<std::mutex> & segment_lock);
void reduceSizeToDownloaded(
const Key & key,
size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::unique_lock<std::mutex> & segment_lock);
struct FileSegmentCell : private boost::noncopyable
struct HitsCountStash
{
FileSegmentPtr file_segment;
/// Iterator is put here on first reservation attempt, if successful.
IFileCachePriority::WriteIterator queue_iterator;
/// Pointer to file segment is always hold by the cache itself.
/// Apart from pointer in cache, it can be hold by cache users, when they call
/// getorSet(), but cache users always hold it via FileSegmentsHolder.
bool releasable() const { return file_segment.unique(); }
size_t size() const { return file_segment->reserved_size; }
FileSegmentCell(FileSegmentPtr file_segment_, FileCache * cache, std::lock_guard<std::mutex> & cache_lock);
FileSegmentCell(FileSegmentCell && other) noexcept
: file_segment(std::move(other.file_segment)), queue_iterator(std::move(other.queue_iterator)) {}
};
using AccessKeyAndOffset = std::pair<Key, size_t>;
struct KeyAndOffsetHash
{
std::size_t operator()(const AccessKeyAndOffset & key) const
HitsCountStash(size_t hits_threashold_, size_t queue_size_)
: hits_threshold(hits_threashold_), queue(std::make_unique<LRUFileCachePriority>(0, queue_size_))
{
return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
if (!queue_size_)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Queue size for hits queue must be non-zero");
}
const size_t hits_threshold;
FileCachePriorityPtr queue;
using Records = std::unordered_map<KeyAndOffset, PriorityIterator, FileCacheKeyAndOffsetHash>;
Records records;
};
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
using CachedFiles = std::unordered_map<Key, FileSegmentsByOffset>;
using FileCacheRecords = std::unordered_map<AccessKeyAndOffset, IFileCachePriority::WriteIterator, KeyAndOffsetHash>;
/**
* A HitsCountStash allows to cache certain data only after it reached
* a certain hit rate, e.g. if hit rate it 5, then data is cached on 6th cache hit.
*/
mutable std::unique_ptr<HitsCountStash> stash;
/**
* A QueryLimit allows to control cache write limit per query.
* E.g. if a query needs n bytes from cache, but it has only k bytes, where 0 <= k <= n
* then allowed loaded cache size is std::min(n - k, max_query_cache_size).
*/
FileCacheQueryLimitPtr query_limit;
/**
* A background cleanup task.
* Clears removed cache entries from metadata.
*/
BackgroundSchedulePool::TaskHolder cleanup_task;
CachedFiles files;
std::unique_ptr<IFileCachePriority> main_priority;
void assertInitialized() const;
FileCacheRecords stash_records;
std::unique_ptr<IFileCachePriority> stash_priority;
size_t max_stash_element_size;
void assertCacheCorrectness();
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
void loadMetadata();
FileSegments getImpl(const Key & key, const FileSegment::Range & range, std::lock_guard<std::mutex> & cache_lock);
FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const;
FileSegmentCell * getCell(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
/// Returns non-owned pointer to the cell stored in the `files` map.
/// Doesn't reserve any space.
FileSegmentCell * addCell(
const Key & key,
FileSegments splitRangeIntoFileSegments(
LockedKey & locked_key,
size_t offset,
size_t size,
FileSegment::State state,
const CreateFileSegmentSettings & create_settings,
std::lock_guard<std::mutex> & cache_lock);
static void useCell(const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock);
bool tryReserveForMainList(
const Key & key,
size_t offset,
size_t size,
QueryContextPtr query_context,
std::lock_guard<std::mutex> & cache_lock);
FileSegments splitRangeIntoCells(
const Key & key,
size_t offset,
size_t size,
FileSegment::State state,
const CreateFileSegmentSettings & create_settings,
std::lock_guard<std::mutex> & cache_lock);
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
const CreateFileSegmentSettings & create_settings);
void fillHolesWithEmptyFileSegments(
LockedKey & locked_key,
FileSegments & file_segments,
const Key & key,
const FileSegment::Range & range,
bool fill_with_detached_file_segments,
const CreateFileSegmentSettings & settings,
std::lock_guard<std::mutex> & cache_lock);
const CreateFileSegmentSettings & settings);
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
KeyMetadata::iterator addFileSegment(
LockedKey & locked_key,
size_t offset,
size_t size,
FileSegment::State state,
const CreateFileSegmentSettings & create_settings,
const CacheGuard::Lock *);
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
void removeKeyDirectoryIfExists(const Key & key, std::lock_guard<std::mutex> & cache_lock) const;
/// Used to track and control the cache access of each query.
/// Through it, we can realize the processing of different queries by the cache layer.
struct QueryContext
{
FileCacheRecords records;
FileCachePriorityPtr priority;
size_t cache_size = 0;
size_t max_cache_size;
bool skip_download_if_exceeds_query_cache;
QueryContext(size_t max_cache_size_, bool skip_download_if_exceeds_query_cache_)
: max_cache_size(max_cache_size_)
, skip_download_if_exceeds_query_cache(skip_download_if_exceeds_query_cache_) {}
size_t getMaxCacheSize() const { return max_cache_size; }
size_t getCacheSize() const { return cache_size; }
FileCachePriorityPtr getPriority() const { return priority; }
bool isSkipDownloadIfExceed() const { return skip_download_if_exceeds_query_cache; }
void remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
void reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
void use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
};
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
QueryContextMap query_map;
QueryContextPtr getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock);
QueryContextPtr getQueryContext(const String & query_id, std::lock_guard<std::mutex> & cache_lock);
void removeQueryContext(const String & query_id);
QueryContextPtr getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> &);
public:
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
void assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock);
void cleanupThreadFunc();
};
}

View File

@ -0,0 +1,31 @@
#include "FileCacheKey.h"
#include <base/hex.h>
#include <Common/SipHash.h>
#include <Core/UUID.h>
namespace DB
{
FileCacheKey::FileCacheKey(const std::string & path)
: key(sipHash128(path.data(), path.size()))
{
}
FileCacheKey::FileCacheKey(const UInt128 & key_)
: key(key_)
{
}
std::string FileCacheKey::toString() const
{
return getHexUIntLowercase(key);
}
FileCacheKey FileCacheKey::random()
{
return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
}
}

View File

@ -1,26 +1,37 @@
#pragma once
#include <Core/Types.h>
#include <base/hex.h>
#include <Core/UUID.h>
#include <fmt/format.h>
namespace DB
{
struct FileCacheKey
{
UInt128 key;
using KeyHash = UInt128;
KeyHash key;
String toString() const { return getHexUIntLowercase(key); }
std::string toString() const;
FileCacheKey() = default;
explicit FileCacheKey(const UInt128 & key_) : key(key_) { }
explicit FileCacheKey(const std::string & path);
static FileCacheKey random() { return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); }
explicit FileCacheKey(const UInt128 & key_);
static FileCacheKey random();
bool operator==(const FileCacheKey & other) const { return key == other.key; }
};
using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
struct FileCacheKeyAndOffsetHash
{
std::size_t operator()(const FileCacheKeyAndOffset & key) const
{
return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
}
};
}
namespace std
@ -32,3 +43,13 @@ struct hash<DB::FileCacheKey>
};
}
template <>
struct fmt::formatter<DB::FileCacheKey> : fmt::formatter<std::string>
{
template <typename FormatCtx>
auto format(const DB::FileCacheKey & key, FormatCtx & ctx) const
{
return fmt::formatter<std::string>::format(key.toString(), ctx);
}
};

View File

@ -30,24 +30,26 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
if (path.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires non-empty `path` field (cache base path) in config");
max_elements = config.getUInt64(config_prefix + ".max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
if (config.has(config_prefix + ".max_file_segment_size"))
max_file_segment_size = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".max_file_segment_size"));
else
max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD);
cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false);
if (config.has(config_prefix + ".bypass_cache_threashold"))
bypass_cache_threashold = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".bypass_cache_threashold"));
else
bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false);
delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS);
}
}

View File

@ -13,18 +13,19 @@ struct FileCacheSettings
std::string base_path;
size_t max_size = 0;
size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
size_t max_elements = FILECACHE_DEFAULT_MAX_ELEMENTS;
size_t max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
bool cache_on_write_operations = false;
size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD;
size_t cache_hits_threshold = FILECACHE_DEFAULT_HITS_THRESHOLD;
bool enable_filesystem_query_cache_limit = false;
bool do_not_evict_index_and_mark_files = true;
bool enable_bypass_cache_with_threashold = false;
size_t bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS;
void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
};

View File

@ -4,10 +4,11 @@
namespace DB
{
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0;
static constexpr size_t REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;;
static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
static constexpr size_t FILECACHE_DELAYED_CLEANUP_INTERVAL_MS = 1000 * 60; /// 1 min
class FileCache;
using FileCachePtr = std::shared_ptr<FileCache>;

View File

@ -0,0 +1,26 @@
#pragma once
#include <list>
namespace DB
{
class FileCache;
using FileCachePtr = std::shared_ptr<FileCache>;
class IFileCachePriority;
using FileCachePriorityPtr = std::unique_ptr<IFileCachePriority>;
class FileSegment;
using FileSegmentPtr = std::shared_ptr<FileSegment>;
using FileSegments = std::list<FileSegmentPtr>;
struct FileSegmentMetadata;
using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
struct LockedKey;
using LockedKeyPtr = std::shared_ptr<LockedKey>;
struct KeyMetadata;
using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
}

File diff suppressed because it is too large Load Diff

View File

@ -2,13 +2,16 @@
#include <boost/noncopyable.hpp>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Interpreters/Cache/Guards.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFileBase.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <IO/OpenedFileCache.h>
#include <base/getThreadId.h>
#include <list>
#include <Interpreters/Cache/IFileCachePriority.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
#include <queue>
@ -22,14 +25,8 @@ extern const Metric CacheFileSegments;
namespace DB
{
class FileCache;
class ReadBufferFromFileBase;
class FileSegment;
using FileSegmentPtr = std::shared_ptr<FileSegment>;
using FileSegments = std::list<FileSegmentPtr>;
/*
* FileSegmentKind is used to specify the eviction policy for file segments.
*/
@ -61,17 +58,13 @@ struct CreateFileSegmentSettings
CreateFileSegmentSettings() = default;
explicit CreateFileSegmentSettings(FileSegmentKind kind_, bool unbounded_ = false)
: kind(kind_), unbounded(unbounded_)
{}
: kind(kind_), unbounded(unbounded_) {}
};
class FileSegment : private boost::noncopyable, public std::enable_shared_from_this<FileSegment>
{
friend class FileCache;
friend struct FileSegmentsHolder;
friend class FileSegmentRangeWriter;
friend class StorageSystemFilesystemCache;
friend struct LockedKey;
friend class FileCache; /// Because of reserved_size in tryReserve().
public:
using Key = FileCacheKey;
@ -79,6 +72,7 @@ public:
using LocalCacheWriterPtr = std::unique_ptr<WriteBufferFromFile>;
using Downloader = std::string;
using DownloaderId = std::string;
using Priority = IFileCachePriority;
enum class State
{
@ -111,18 +105,20 @@ public:
* If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
* this file segment as out of cache scope.
*/
SKIP_CACHE,
DETACHED,
};
FileSegment(
const Key & key_,
size_t offset_,
size_t size_,
const Key & key_,
FileCache * cache_,
State download_state_,
const CreateFileSegmentSettings & create_settings);
const CreateFileSegmentSettings & create_settings = {},
FileCache * cache_ = nullptr,
std::weak_ptr<KeyMetadata> key_metadata_ = std::weak_ptr<KeyMetadata>(),
Priority::Iterator queue_iterator_ = Priority::Iterator{});
~FileSegment();
~FileSegment() = default;
State state() const;
@ -158,11 +154,10 @@ public:
size_t offset() const { return range().left; }
FileSegmentKind getKind() const { return segment_kind; }
bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
bool isUnbound() const { return is_unbound; }
using UniqueId = std::pair<FileCacheKey, size_t>;
UniqueId getUniqueId() const { return std::pair(key(), offset()); }
bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
bool isUnbound() const { return is_unbound; }
String getPathInLocalCache() const;
@ -177,7 +172,7 @@ public:
DownloaderId getDownloader() const;
/// Wait for the change of state from DOWNLOADING to any other.
State wait();
State wait(size_t offset);
bool isDownloaded() const;
@ -187,11 +182,13 @@ public:
void incrementHitsCount() { ++hits_count; }
size_t getCurrentWriteOffset() const;
size_t getCurrentWriteOffset(bool sync) const;
size_t getFirstNonDownloadedOffset() const;
size_t getFirstNonDownloadedOffset(bool sync) const;
size_t getDownloadedSize() const;
size_t getDownloadedSize(bool sync) const;
size_t getReservedSize() const;
/// Now detached status can be used in the following cases:
/// 1. there is only 1 remaining file segment holder
@ -207,15 +204,43 @@ public:
/// 2. Detached file segment can still be hold by some cache users, but it's state became
/// immutable at the point it was detached, any non-const / stateful method will throw an
/// exception.
void detach(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
void detach(const FileSegmentGuard::Lock &, const LockedKey &);
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment);
bool isDetached() const;
bool isCompleted() const;
/// File segment has a completed state, if this state is final and
/// is not going to be changed. Completed states: DOWNALODED, DETACHED.
bool isCompleted(bool sync = false) const;
void assertCorrectness() const;
void use();
/**
* ========== Methods used by `cache` ========================
*/
FileSegmentGuard::Lock lock() const { return segment_guard.lock(); }
Priority::Iterator getQueueIterator() const;
void setQueueIterator(Priority::Iterator iterator);
KeyMetadataPtr tryGetKeyMetadata() const;
KeyMetadataPtr getKeyMetadata() const;
bool assertCorrectness() const;
/**
* ========== Methods that must do cv.notify() ==================
*/
void complete();
void completePartAndResetDownloader();
void resetDownloader();
/**
* ========== Methods for _only_ file segment's `downloader` ==================
@ -233,16 +258,6 @@ public:
/// Write data into reserved space.
void write(const char * from, size_t size, size_t offset);
/// Complete file segment with a certain state.
void completeWithState(State state);
void completeWithoutState();
/// Complete file segment's part which was last written.
void completePartAndResetDownloader();
void resetDownloader();
// Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's
// available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset().
//
@ -252,125 +267,112 @@ public:
RemoteFileReaderPtr extractRemoteFileReader();
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
void resetRemoteFileReader();
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
void setDownloadedSize(size_t delta);
LocalCacheWriterPtr detachWriter();
private:
size_t getFirstNonDownloadedOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
size_t getCurrentWriteOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
size_t getDownloadedSizeUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
String getDownloaderUnlocked(const FileSegmentGuard::Lock &) const;
bool isDownloaderUnlocked(const FileSegmentGuard::Lock & segment_lock) const;
void resetDownloaderUnlocked(const FileSegmentGuard::Lock &);
String getInfoForLogUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
void setDownloadState(State state, const FileSegmentGuard::Lock &);
void resetDownloadingStateUnlocked(const FileSegmentGuard::Lock &);
void setDetachedState(const FileSegmentGuard::Lock &);
String getDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
void resetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
void resetDownloadingStateUnlocked(std::unique_lock<std::mutex> & segment_lock);
String getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const;
void setDownloadState(State state);
void setDownloadedUnlocked(const FileSegmentGuard::Lock &);
void setDownloadFailedUnlocked(const FileSegmentGuard::Lock &);
void setDownloadedUnlocked(std::unique_lock<std::mutex> & segment_lock);
void setDownloadFailedUnlocked(std::unique_lock<std::mutex> & segment_lock);
void setDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /* download_lock */, size_t delta);
bool hasFinalizedStateUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
bool isDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
bool isDetached(std::unique_lock<std::mutex> & /* segment_lock */) const { return is_detached; }
void detachAssumeStateFinalized(std::unique_lock<std::mutex> & segment_lock);
[[noreturn]] void throwIfDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
void assertDetachedStatus(std::unique_lock<std::mutex> & segment_lock) const;
void assertNotDetached() const;
void assertNotDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
void assertIsDownloaderUnlocked(const std::string & operation, std::unique_lock<std::mutex> & segment_lock) const;
void assertCorrectnessUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
void assertNotDetachedUnlocked(const FileSegmentGuard::Lock &) const;
void assertIsDownloaderUnlocked(const std::string & operation, const FileSegmentGuard::Lock &) const;
bool assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const;
/// completeWithoutStateUnlocked() is called from destructor of FileSegmentsHolder.
/// Function might check if the caller of the method
/// is the last alive holder of the segment. Therefore, completion and destruction
/// of the file segment pointer must be done under the same cache mutex.
void completeWithoutStateUnlocked(std::lock_guard<std::mutex> & cache_lock);
void completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
void completePartAndResetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
void wrapWithCacheInfo(Exception & e, const String & message, std::unique_lock<std::mutex> & segment_lock) const;
LockedKeyPtr lockKeyMetadata(bool assert_exists = true) const;
Key file_key;
Range segment_range;
const FileSegmentKind segment_kind;
/// Size of the segment is not known until it is downloaded and
/// can be bigger than max_file_segment_size.
const bool is_unbound = false;
State download_state;
/// The one who prepares the download
DownloaderId downloader_id;
std::atomic<State> download_state;
DownloaderId downloader_id; /// The one who prepares the download
RemoteFileReaderPtr remote_file_reader;
LocalCacheWriterPtr cache_writer;
bool detached_writer = false;
/// downloaded_size should always be less or equal to reserved_size
size_t downloaded_size = 0;
size_t reserved_size = 0;
/// global locking order rule:
/// 1. cache lock
/// 2. segment lock
mutable std::mutex mutex;
std::condition_variable cv;
/// Protects downloaded_size access with actual write into fs.
/// downloaded_size is not protected by download_mutex in methods which
/// can never be run in parallel to FileSegment::write() method
/// as downloaded_size is updated only in FileSegment::write() method.
/// Such methods are identified by isDownloader() check at their start,
/// e.g. they are executed strictly by the same thread, sequentially.
std::atomic<size_t> downloaded_size = 0;
std::atomic<size_t> reserved_size = 0;
mutable std::mutex download_mutex;
Key file_key;
mutable FileSegmentGuard segment_guard;
std::weak_ptr<KeyMetadata> key_metadata;
mutable Priority::Iterator queue_iterator; /// Iterator is put here on first reservation attempt, if successful.
FileCache * cache;
std::condition_variable cv;
Poco::Logger * log;
/// "detached" file segment means that it is not owned by cache ("detached" from cache).
/// In general case, all file segments are owned by cache.
bool is_detached = false;
bool is_completed = false;
bool is_downloaded = false;
std::atomic<size_t> hits_count = 0; /// cache hits.
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
FileSegmentKind segment_kind;
/// Size of the segment is not known until it is downloaded and can be bigger than max_file_segment_size.
bool is_unbound = false;
CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments};
};
struct FileSegmentsHolder : private boost::noncopyable
{
FileSegmentsHolder() = default;
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
void reset();
bool empty() const { return file_segments.empty(); }
explicit FileSegmentsHolder(FileSegments && file_segments_, bool complete_on_dtor_ = true)
: file_segments(std::move(file_segments_)), complete_on_dtor(complete_on_dtor_) {}
~FileSegmentsHolder();
bool empty() const { return file_segments.empty(); }
size_t size() const { return file_segments.size(); }
String toString();
void popFront() { completeAndPopFrontImpl(); }
FileSegment & front() { return *file_segments.front(); }
FileSegment & back() { return *file_segments.back(); }
FileSegment & add(FileSegmentPtr && file_segment)
{
file_segments.push_back(file_segment);
return *file_segments.back();
}
FileSegments::iterator begin() { return file_segments.begin(); }
FileSegments::iterator end() { return file_segments.end(); }
FileSegments::const_iterator begin() const { return file_segments.begin(); }
FileSegments::const_iterator end() const { return file_segments.end(); }
void moveTo(FileSegmentsHolder & holder)
{
holder.file_segments.insert(holder.file_segments.end(), file_segments.begin(), file_segments.end());
file_segments.clear();
}
private:
FileSegments file_segments{};
const bool complete_on_dtor = true;
FileSegments::iterator completeAndPopFrontImpl();
};
using FileSegmentsHolderPtr = std::unique_ptr<FileSegmentsHolder>;
}

View File

@ -0,0 +1,117 @@
#pragma once
#include <mutex>
#include <Interpreters/Cache/FileCache_fwd.h>
#include <boost/noncopyable.hpp>
#include <map>
namespace DB
{
/**
* FileCache::get/getOrSet/set
* 1. CacheMetadataGuard::Lock (take key lock and release metadata lock)
* 2. KeyGuard::Lock (hold till the end of the method)
*
* FileCache::tryReserve
* 1. CacheGuard::Lock
* 2. KeyGuard::Lock (taken without metadata lock)
* 3. any number of KeyGuard::Lock's for files which are going to be evicted (taken via metadata lock)
*
* FileCache::removeIfExists
* 1. CacheGuard::Lock
* 2. KeyGuard::Lock (taken via metadata lock)
* 3. FileSegmentGuard::Lock
*
* FileCache::removeAllReleasable
* 1. CacheGuard::Lock
* 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
* 3. FileSegmentGuard::Lock
*
* FileCache::getSnapshot (for all cache)
* 1. metadata lock
* 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
* 3. FileSegmentGuard::Lock
*
* FileCache::getSnapshot(key)
* 1. KeyGuard::Lock (taken via metadata lock)
* 2. FileSegmentGuard::Lock
*
* FileSegment::complete
* 1. CacheGuard::Lock
* 2. KeyGuard::Lock (taken without metadata lock)
* 3. FileSegmentGuard::Lock
*
* Rules:
* 1. Priority of locking: CacheGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock
* 2. If we take more than one key lock at a moment of time, we need to take CacheGuard::Lock (example: tryReserve())
*
*
* _CacheGuard_
* 1. FileCache::tryReserve
* 2. FileCache::removeIfExists(key)
* 3. FileCache::removeAllReleasable
* 4. FileSegment::complete
*
* _KeyGuard_ _CacheMetadataGuard_
* 1. all from CacheGuard 1. getOrSet/get/set
* 2. getOrSet/get/Set
*
* *This table does not include locks taken for introspection and system tables.
*/
/**
* Cache priority queue guard.
*/
struct CacheGuard : private boost::noncopyable
{
struct Lock : public std::unique_lock<std::mutex>
{
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
};
Lock lock() { return Lock(mutex); }
std::mutex mutex;
};
/**
* Guard for cache metadata.
*/
struct CacheMetadataGuard : private boost::noncopyable
{
struct Lock : public std::unique_lock<std::mutex>
{
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
};
Lock lock() { return Lock(mutex); }
std::mutex mutex;
};
/**
* Key guard. A separate guard for each cache key.
*/
struct KeyGuard : private boost::noncopyable
{
struct Lock : public std::unique_lock<std::mutex>
{
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
};
Lock lock() { return Lock(mutex); }
std::mutex mutex;
};
/**
* Guard for a file segment.
*/
struct FileSegmentGuard : private boost::noncopyable
{
struct Lock : public std::unique_lock<std::mutex>
{
explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
};
Lock lock() { return Lock(mutex); }
std::mutex mutex;
};
}

View File

@ -5,33 +5,35 @@
#include <Core/Types.h>
#include <Common/Exception.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Interpreters/Cache/Guards.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
namespace DB
{
class IFileCachePriority;
using FileCachePriorityPtr = std::shared_ptr<IFileCachePriority>;
/// IFileCachePriority is used to maintain the priority of cached data.
class IFileCachePriority
class IFileCachePriority : private boost::noncopyable
{
public:
class IIterator;
using Key = FileCacheKey;
using ReadIterator = std::unique_ptr<const IIterator>;
using WriteIterator = std::shared_ptr<IIterator>;
using KeyAndOffset = FileCacheKeyAndOffset;
struct FileCacheRecord
struct Entry
{
Key key;
size_t offset;
size_t size;
size_t hits = 0;
Entry(const Key & key_, size_t offset_, size_t size_, KeyMetadataPtr key_metadata_)
: key(key_), offset(offset_), size(size_), key_metadata(key_metadata_) {}
FileCacheRecord(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) { }
Entry(const Entry & other)
: key(other.key), offset(other.offset), size(other.size.load()), hits(other.hits), key_metadata(other.key_metadata) {}
const Key key;
const size_t offset;
std::atomic<size_t> size;
size_t hits = 0;
const KeyMetadataPtr key_metadata;
};
/// It provides an iterator to traverse the cache priority. Under normal circumstances,
/// Provides an iterator to traverse the cache priority. Under normal circumstances,
/// the iterator can only return the records that have been directly swapped out.
/// For example, in the LRU algorithm, it can traverse all records, but in the LRU-K, it
/// can only traverse the records in the low priority queue.
@ -40,56 +42,54 @@ public:
public:
virtual ~IIterator() = default;
virtual const Key & key() const = 0;
virtual size_t use(const CacheGuard::Lock &) = 0;
virtual size_t offset() const = 0;
virtual std::shared_ptr<IIterator> remove(const CacheGuard::Lock &) = 0;
virtual size_t size() const = 0;
virtual const Entry & getEntry() const = 0;
virtual size_t hits() const = 0;
virtual Entry & getEntry() = 0;
/// Point the iterator to the next higher priority cache record.
virtual void next() const = 0;
virtual void annul() = 0;
virtual bool valid() const = 0;
/// Mark a cache record as recently used, it will update the priority
/// of the cache record according to different cache algorithms.
virtual void use(std::lock_guard<std::mutex> &) = 0;
/// Deletes an existing cached record. And to avoid pointer suspension
/// the iterator should automatically point to the next record.
virtual void removeAndGetNext(std::lock_guard<std::mutex> &) = 0;
virtual void updateSize(int64_t, std::lock_guard<std::mutex> &) = 0;
virtual void updateSize(int64_t size) = 0;
};
public:
using Iterator = std::shared_ptr<IIterator>;
using ConstIterator = std::shared_ptr<const IIterator>;
enum class IterationResult
{
BREAK,
CONTINUE,
REMOVE_AND_CONTINUE,
};
using IterateFunc = std::function<IterationResult(LockedKey &, FileSegmentMetadataPtr)>;
IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) {}
virtual ~IFileCachePriority() = default;
/// Add a cache record that did not exist before, and throw a
/// logical exception if the cache block already exists.
virtual WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock) = 0;
size_t getElementsLimit() const { return max_elements; }
/// This method is used for assertions in debug mode. So we do not care about complexity here.
/// Query whether a cache record exists. If it exists, return true. If not, return false.
virtual bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) = 0;
size_t getSizeLimit() const { return max_size; }
virtual void removeAll(std::lock_guard<std::mutex> & cache_lock) = 0;
virtual size_t getSize(const CacheGuard::Lock &) const = 0;
/// Returns an iterator pointing to the lowest priority cached record.
/// We can traverse all cached records through the iterator's next().
virtual ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
/// The same as getLowestPriorityReadIterator(), but it is writeable.
virtual WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
virtual Iterator add(
KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
virtual size_t getElementsNum(std::lock_guard<std::mutex> & cache_lock) const = 0;
virtual void pop(const CacheGuard::Lock &) = 0;
size_t getCacheSize(std::lock_guard<std::mutex> &) const { return cache_size; }
virtual void removeAll(const CacheGuard::Lock &) = 0;
protected:
size_t max_cache_size = 0;
size_t cache_size = 0;
virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
private:
const size_t max_size = 0;
const size_t max_elements = 0;
};
};

View File

@ -1,5 +1,7 @@
#include <Interpreters/Cache/LRUFileCachePriority.h>
#include <Interpreters/Cache/FileCache.h>
#include <Common/CurrentMetrics.h>
#include <Common/randomSeed.h>
#include <Common/logger_useful.h>
namespace CurrentMetrics
@ -16,8 +18,13 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &)
IFileCachePriority::Iterator LRUFileCachePriority::add(
KeyMetadataPtr key_metadata,
size_t offset,
size_t size,
const CacheGuard::Lock &)
{
const auto & key = key_metadata->key;
#ifndef NDEBUG
for (const auto & entry : queue)
{
@ -25,40 +32,56 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
entry.key.toString(), entry.offset, entry.size);
entry.key, entry.offset, entry.size);
}
#endif
auto iter = queue.insert(queue.end(), FileCacheRecord(key, offset, size));
cache_size += size;
const auto & size_limit = getSizeLimit();
if (size_limit && current_size + size > size_limit)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Not enough space to add {}:{} with size {}: current size: {}/{}",
key, offset, size, current_size, getSizeLimit());
}
current_size += size;
auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata));
CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements);
LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key, offset);
return std::make_shared<LRUFileCacheIterator>(this, iter);
}
bool LRUFileCachePriority::contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &)
void LRUFileCachePriority::removeAll(const CacheGuard::Lock &)
{
for (const auto & record : queue)
{
if (key == record.key && offset == record.offset)
return true;
}
return false;
}
void LRUFileCachePriority::removeAll(std::lock_guard<std::mutex> &)
{
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, current_size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size());
LOG_TEST(log, "Removed all entries from LRU queue");
queue.clear();
cache_size = 0;
current_size = 0;
}
void LRUFileCachePriority::pop(const CacheGuard::Lock &)
{
remove(queue.begin());
}
LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it)
{
current_size -= it->size;
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
LOG_TEST(log, "Removed entry from LRU queue, key: {}, offset: {}", it->key, it->offset);
return queue.erase(it);
}
LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
@ -67,36 +90,67 @@ LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
{
}
IFileCachePriority::ReadIterator LRUFileCachePriority::getLowestPriorityReadIterator(std::lock_guard<std::mutex> &)
void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &)
{
return std::make_unique<const LRUFileCacheIterator>(this, queue.begin());
for (auto it = queue.begin(); it != queue.end();)
{
auto locked_key = it->key_metadata->tryLock();
if (!locked_key || it->size == 0)
{
it = remove(it);
continue;
}
auto metadata = locked_key->tryGetByOffset(it->offset);
if (!metadata)
{
it = remove(it);
continue;
}
if (metadata->size() != it->size)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Mismatch of file segment size in file segment metadata and priority queue: {} != {} ({})",
it->size, metadata->size(), metadata->file_segment->getInfoForLog());
}
auto result = func(*locked_key, metadata);
switch (result)
{
case IterationResult::BREAK:
{
return;
}
case IterationResult::CONTINUE:
{
++it;
break;
}
case IterationResult::REMOVE_AND_CONTINUE:
{
it = remove(it);
break;
}
}
}
}
IFileCachePriority::WriteIterator LRUFileCachePriority::getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &)
LRUFileCachePriority::Iterator LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &)
{
return std::make_shared<LRUFileCacheIterator>(this, queue.begin());
return std::make_shared<LRUFileCacheIterator>(cache_priority, cache_priority->remove(queue_iter));
}
size_t LRUFileCachePriority::getElementsNum(std::lock_guard<std::mutex> &) const
void LRUFileCachePriority::LRUFileCacheIterator::annul()
{
return queue.size();
cache_priority->current_size -= queue_iter->size;
queue_iter->size = 0;
}
void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guard<std::mutex> &)
void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
{
cache_priority->cache_size -= queue_iter->size;
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size);
CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
queue_iter = cache_priority->queue.erase(queue_iter);
}
void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size, std::lock_guard<std::mutex> &)
{
cache_priority->cache_size += size;
cache_priority->current_size += size;
if (size > 0)
CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
@ -105,14 +159,14 @@ void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size, std::l
queue_iter->size += size;
chassert(queue_iter->size > 0);
chassert(cache_priority->cache_size >= 0);
chassert(cache_priority->current_size >= 0);
chassert(queue_iter->size >= 0);
}
void LRUFileCachePriority::LRUFileCacheIterator::use(std::lock_guard<std::mutex> &)
size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
{
queue_iter->hits++;
cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
return ++queue_iter->hits;
}
};

View File

@ -2,6 +2,8 @@
#include <list>
#include <Interpreters/Cache/IFileCachePriority.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -12,51 +14,51 @@ class LRUFileCachePriority : public IFileCachePriority
{
private:
class LRUFileCacheIterator;
using LRUQueue = std::list<FileCacheRecord>;
using LRUQueue = std::list<Entry>;
using LRUQueueIterator = typename LRUQueue::iterator;
public:
LRUFileCachePriority() = default;
LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &) override;
size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &) override;
size_t getElementsCount(const CacheGuard::Lock &) const override { return queue.size(); }
void removeAll(std::lock_guard<std::mutex> &) override;
Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> &) override;
void pop(const CacheGuard::Lock &) override;
WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &) override;
void removeAll(const CacheGuard::Lock &) override;
size_t getElementsNum(std::lock_guard<std::mutex> &) const override;
void iterate(IterateFunc && func, const CacheGuard::Lock &) override;
private:
LRUQueue queue;
Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority");
std::atomic<size_t> current_size = 0;
LRUQueueIterator remove(LRUQueueIterator it);
};
class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
{
public:
LRUFileCacheIterator(LRUFileCachePriority * cache_priority_, LRUFileCachePriority::LRUQueueIterator queue_iter_);
LRUFileCacheIterator(
LRUFileCachePriority * cache_priority_,
LRUFileCachePriority::LRUQueueIterator queue_iter_);
void next() const override { queue_iter++; }
const Entry & getEntry() const override { return *queue_iter; }
bool valid() const override { return queue_iter != cache_priority->queue.end(); }
Entry & getEntry() override { return *queue_iter; }
const Key & key() const override { return queue_iter->key; }
size_t use(const CacheGuard::Lock &) override;
size_t offset() const override { return queue_iter->offset; }
Iterator remove(const CacheGuard::Lock &) override;
size_t size() const override { return queue_iter->size; }
void annul() override;
size_t hits() const override { return queue_iter->hits; }
void removeAndGetNext(std::lock_guard<std::mutex> &) override;
void updateSize(int64_t size, std::lock_guard<std::mutex> &) override;
void use(std::lock_guard<std::mutex> &) override;
void updateSize(int64_t size) override;
private:
LRUFileCachePriority * cache_priority;

View File

@ -0,0 +1,468 @@
#include <Interpreters/Cache/Metadata.h>
#include <Interpreters/Cache/FileCache.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Common/logger_useful.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_)
: file_segment(std::move(file_segment_))
{
switch (file_segment->state())
{
case FileSegment::State::DOWNLOADED:
{
chassert(file_segment->getQueueIterator());
break;
}
case FileSegment::State::EMPTY:
case FileSegment::State::DOWNLOADING:
{
break;
}
default:
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Can create file segment with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
FileSegment::stateToString(file_segment->state()));
}
}
size_t FileSegmentMetadata::size() const
{
return file_segment->getReservedSize();
}
KeyMetadata::KeyMetadata(
const Key & key_,
const std::string & key_path_,
CleanupQueue & cleanup_queue_,
bool created_base_directory_)
: key(key_)
, key_path(key_path_)
, cleanup_queue(cleanup_queue_)
, created_base_directory(created_base_directory_)
{
if (created_base_directory)
chassert(fs::exists(key_path));
}
LockedKeyPtr KeyMetadata::lock()
{
auto locked = tryLock();
if (locked)
return locked;
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot lock key {} (state: {})", key, magic_enum::enum_name(key_state));
}
LockedKeyPtr KeyMetadata::tryLock()
{
auto locked = std::make_unique<LockedKey>(shared_from_this());
if (key_state == KeyMetadata::KeyState::ACTIVE)
return locked;
return nullptr;
}
bool KeyMetadata::createBaseDirectory()
{
if (!created_base_directory.exchange(true))
{
try
{
fs::create_directories(key_path);
}
catch (...)
{
/// Avoid errors like
/// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device
/// and mark file segment with SKIP_CACHE state
tryLogCurrentException(__PRETTY_FUNCTION__);
created_base_directory = false;
return false;
}
}
return true;
}
std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment)
{
return fs::path(key_path)
/ CacheMetadata::getFileNameForFileSegment(file_segment.offset(), file_segment.getKind());
}
class CleanupQueue
{
friend struct CacheMetadata;
public:
void add(const FileCacheKey & key);
void remove(const FileCacheKey & key);
size_t getSize() const;
private:
bool tryPop(FileCacheKey & key);
std::unordered_set<FileCacheKey> keys;
mutable std::mutex mutex;
};
CacheMetadata::CacheMetadata(const std::string & path_)
: path(path_)
, cleanup_queue(std::make_unique<CleanupQueue>())
, log(&Poco::Logger::get("CacheMetadata"))
{
}
String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind)
{
String file_suffix;
switch (segment_kind)
{
case FileSegmentKind::Persistent:
file_suffix = "_persistent";
break;
case FileSegmentKind::Temporary:
file_suffix = "_temporary";
break;
case FileSegmentKind::Regular:
file_suffix = "";
break;
}
return std::to_string(offset) + file_suffix;
}
String CacheMetadata::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
{
String file_suffix;
const auto key_str = key.toString();
return fs::path(path) / key_str.substr(0, 3) / key_str / getFileNameForFileSegment(offset, segment_kind);
}
String CacheMetadata::getPathInLocalCache(const Key & key) const
{
const auto key_str = key.toString();
return fs::path(path) / key_str.substr(0, 3) / key_str;
}
LockedKeyPtr CacheMetadata::lockKeyMetadata(
const FileCacheKey & key,
KeyNotFoundPolicy key_not_found_policy,
bool is_initial_load)
{
KeyMetadataPtr key_metadata;
{
auto lock = guard.lock();
auto it = find(key);
if (it == end())
{
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
return nullptr;
it = emplace(
key, std::make_shared<KeyMetadata>(
key, getPathInLocalCache(key), *cleanup_queue, is_initial_load)).first;
}
key_metadata = it->second;
}
{
auto locked_metadata = std::make_unique<LockedKey>(key_metadata);
const auto key_state = locked_metadata->getKeyState();
if (key_state == KeyMetadata::KeyState::ACTIVE)
return locked_metadata;
if (key_not_found_policy == KeyNotFoundPolicy::THROW)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
return nullptr;
if (key_state == KeyMetadata::KeyState::REMOVING)
{
locked_metadata->removeFromCleanupQueue();
return locked_metadata;
}
chassert(key_state == KeyMetadata::KeyState::REMOVED);
chassert(key_not_found_policy == KeyNotFoundPolicy::CREATE_EMPTY);
}
/// Not we are at a case:
/// key_state == KeyMetadata::KeyState::REMOVED
/// and KeyNotFoundPolicy == CREATE_EMPTY
/// Retry.
return lockKeyMetadata(key, key_not_found_policy);
}
void CacheMetadata::iterate(IterateCacheMetadataFunc && func)
{
auto lock = guard.lock();
for (const auto & [key, key_metadata] : *this)
{
auto locked_key = std::make_unique<LockedKey>(key_metadata);
const auto key_state = locked_key->getKeyState();
if (key_state == KeyMetadata::KeyState::ACTIVE)
{
func(*locked_key);
continue;
}
if (key_state == KeyMetadata::KeyState::REMOVING)
continue;
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Cannot lock key {}: key does not exist", key_metadata->key);
}
}
void CacheMetadata::doCleanup()
{
auto lock = guard.lock();
/// Let's mention this case.
/// This metadata cleanup is delayed so what is we marked key as deleted and
/// put it to deletion queue, but then the same key was added to cache before
/// we actually performed this delayed removal?
/// In this case it will work fine because on each attempt to add any key to cache
/// we perform this delayed removal.
FileCacheKey cleanup_key;
while (cleanup_queue->tryPop(cleanup_key))
{
auto it = find(cleanup_key);
if (it == end())
continue;
auto locked_metadata = std::make_unique<LockedKey>(it->second);
const auto key_state = locked_metadata->getKeyState();
if (key_state == KeyMetadata::KeyState::ACTIVE)
{
/// Key was added back to cache after we submitted it to removal queue.
continue;
}
locked_metadata->markAsRemoved();
erase(it);
try
{
const fs::path key_directory = getPathInLocalCache(cleanup_key);
if (fs::exists(key_directory))
fs::remove_all(key_directory);
const fs::path key_prefix_directory = key_directory.parent_path();
if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
fs::remove_all(key_prefix_directory);
}
catch (...)
{
chassert(false);
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
LockedKey::LockedKey(std::shared_ptr<KeyMetadata> key_metadata_)
: key_metadata(key_metadata_)
, lock(key_metadata->guard.lock())
, log(&Poco::Logger::get("LockedKey"))
{
}
LockedKey::~LockedKey()
{
if (!key_metadata->empty())
return;
key_metadata->key_state = KeyMetadata::KeyState::REMOVING;
key_metadata->cleanup_queue.add(getKey());
}
void LockedKey::removeFromCleanupQueue()
{
if (key_metadata->key_state != KeyMetadata::KeyState::REMOVING)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove non-removing");
/// Just mark key_state as "not to be removed", the cleanup thread will check it and skip the key.
key_metadata->key_state = KeyMetadata::KeyState::ACTIVE;
}
void LockedKey::markAsRemoved()
{
key_metadata->key_state = KeyMetadata::KeyState::REMOVED;
}
bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const
{
const auto file_segment_metadata = getByOffset(offset);
return file_segment_metadata->file_segment.use_count() == 2;
}
void LockedKey::removeAllReleasable()
{
for (auto it = key_metadata->begin(); it != key_metadata->end();)
{
if (!it->second->releasable())
{
++it;
continue;
}
auto file_segment = it->second->file_segment;
it = removeFileSegment(file_segment->offset(), file_segment->lock());
}
}
KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock)
{
LOG_DEBUG(log, "Remove from cache. Key: {}, offset: {}", getKey(), offset);
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset);
auto file_segment = it->second->file_segment;
if (file_segment->queue_iterator)
file_segment->queue_iterator->annul();
const auto path = key_metadata->getFileSegmentPath(*file_segment);
if (fs::exists(path))
fs::remove(path);
file_segment->detach(segment_lock, *this);
return key_metadata->erase(it);
}
void LockedKey::shrinkFileSegmentToDownloadedSize(
size_t offset,
const FileSegmentGuard::Lock & segment_lock)
{
/**
* In case file was partially downloaded and it's download cannot be continued
* because of no space left in cache, we need to be able to cut file segment's size to downloaded_size.
*/
auto metadata = getByOffset(offset);
const auto & file_segment = metadata->file_segment;
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
const size_t downloaded_size = file_segment->getDownloadedSize(false);
if (downloaded_size == file_segment->range().size())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Nothing to reduce, file segment fully downloaded: {}",
file_segment->getInfoForLogUnlocked(segment_lock));
}
int64_t diff = file_segment->reserved_size - downloaded_size;
metadata->file_segment = std::make_shared<FileSegment>(
getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED,
CreateFileSegmentSettings(file_segment->getKind()),
file_segment->cache, key_metadata, file_segment->queue_iterator);
if (diff)
metadata->getQueueIterator()->updateSize(-diff);
chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
}
std::shared_ptr<const FileSegmentMetadata> LockedKey::getByOffset(size_t offset) const
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
return it->second;
}
std::shared_ptr<FileSegmentMetadata> LockedKey::getByOffset(size_t offset)
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
return it->second;
}
std::shared_ptr<const FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset) const
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
return nullptr;
return it->second;
}
std::shared_ptr<FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset)
{
auto it = key_metadata->find(offset);
if (it == key_metadata->end())
return nullptr;
return it->second;
}
std::string LockedKey::toString() const
{
std::string result;
for (auto it = key_metadata->begin(); it != key_metadata->end(); ++it)
{
if (it != key_metadata->begin())
result += ", ";
result += std::to_string(it->first);
}
return result;
}
void CleanupQueue::add(const FileCacheKey & key)
{
std::lock_guard lock(mutex);
keys.insert(key);
}
void CleanupQueue::remove(const FileCacheKey & key)
{
std::lock_guard lock(mutex);
bool erased = keys.erase(key);
if (!erased)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key {} in removal queue", key);
}
bool CleanupQueue::tryPop(FileCacheKey & key)
{
std::lock_guard lock(mutex);
if (keys.empty())
return false;
auto it = keys.begin();
key = *it;
keys.erase(it);
return true;
}
size_t CleanupQueue::getSize() const
{
std::lock_guard lock(mutex);
return keys.size();
}
}

View File

@ -0,0 +1,176 @@
#pragma once
#include <boost/noncopyable.hpp>
#include <Interpreters/Cache/Guards.h>
#include <Interpreters/Cache/IFileCachePriority.h>
#include <Interpreters/Cache/FileCacheKey.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Interpreters/Cache/FileCache_fwd_internal.h>
namespace DB
{
class CleanupQueue;
using CleanupQueuePtr = std::shared_ptr<CleanupQueue>;
struct FileSegmentMetadata : private boost::noncopyable
{
using Priority = IFileCachePriority;
explicit FileSegmentMetadata(FileSegmentPtr && file_segment_);
bool releasable() const { return file_segment.unique(); }
size_t size() const;
bool valid() const { return !removal_candidate.load(); }
Priority::Iterator getQueueIterator() { return file_segment->getQueueIterator(); }
FileSegmentPtr file_segment;
std::atomic<bool> removal_candidate{false};
};
using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
private boost::noncopyable,
public std::enable_shared_from_this<KeyMetadata>
{
friend struct LockedKey;
using Key = FileCacheKey;
KeyMetadata(
const Key & key_,
const std::string & key_path_,
CleanupQueue & cleanup_queue_,
bool created_base_directory_ = false);
enum class KeyState
{
ACTIVE,
REMOVING,
REMOVED,
};
const Key key;
const std::string key_path;
LockedKeyPtr lock();
/// Return nullptr if key has non-ACTIVE state.
LockedKeyPtr tryLock();
bool createBaseDirectory();
std::string getFileSegmentPath(const FileSegment & file_segment);
private:
KeyState key_state = KeyState::ACTIVE;
KeyGuard guard;
CleanupQueue & cleanup_queue;
std::atomic<bool> created_base_directory = false;
};
using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
struct CacheMetadata : public std::unordered_map<FileCacheKey, KeyMetadataPtr>, private boost::noncopyable
{
public:
using Key = FileCacheKey;
using IterateCacheMetadataFunc = std::function<void(const LockedKey &)>;
explicit CacheMetadata(const std::string & path_);
const String & getBaseDirectory() const { return path; }
String getPathInLocalCache(
const Key & key,
size_t offset,
FileSegmentKind segment_kind) const;
String getPathInLocalCache(const Key & key) const;
static String getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind);
void iterate(IterateCacheMetadataFunc && func);
enum class KeyNotFoundPolicy
{
THROW,
CREATE_EMPTY,
RETURN_NULL,
};
LockedKeyPtr lockKeyMetadata(
const Key & key,
KeyNotFoundPolicy key_not_found_policy,
bool is_initial_load = false);
void doCleanup();
private:
const std::string path; /// Cache base path
CacheMetadataGuard guard;
const CleanupQueuePtr cleanup_queue;
Poco::Logger * log;
};
/**
* `LockedKey` is an object which makes sure that as long as it exists the following is true:
* 1. the key cannot be removed from cache
* (Why: this LockedKey locks key metadata mutex in ctor, unlocks it in dtor, and so
* when key is going to be deleted, key mutex is also locked.
* Why it cannot be the other way round? E.g. that ctor of LockedKey locks the key
* right after it was deleted? This case it taken into consideration in createLockedKey())
* 2. the key cannot be modified, e.g. new offsets cannot be added to key; already existing
* offsets cannot be deleted from the key
* And also provides some methods which allow the owner of this LockedKey object to do such
* modification of the key (adding/deleting offsets) and deleting the key from cache.
*/
struct LockedKey : private boost::noncopyable
{
using Key = FileCacheKey;
explicit LockedKey(std::shared_ptr<KeyMetadata> key_metadata_);
~LockedKey();
const Key & getKey() const { return key_metadata->key; }
auto begin() const { return key_metadata->begin(); }
auto end() const { return key_metadata->end(); }
std::shared_ptr<const FileSegmentMetadata> getByOffset(size_t offset) const;
std::shared_ptr<FileSegmentMetadata> getByOffset(size_t offset);
std::shared_ptr<const FileSegmentMetadata> tryGetByOffset(size_t offset) const;
std::shared_ptr<FileSegmentMetadata> tryGetByOffset(size_t offset);
KeyMetadata::KeyState getKeyState() const { return key_metadata->key_state; }
std::shared_ptr<const KeyMetadata> getKeyMetadata() const { return key_metadata; }
std::shared_ptr<KeyMetadata> getKeyMetadata() { return key_metadata; }
void removeAllReleasable();
KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &);
void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &);
bool isLastOwnerOfFileSegment(size_t offset) const;
void removeFromCleanupQueue();
void markAsRemoved();
std::string toString() const;
private:
const std::shared_ptr<KeyMetadata> key_metadata;
KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`.
Poco::Logger * log;
};
}

View File

@ -0,0 +1,112 @@
#include <Interpreters/Cache/QueryLimit.h>
#include <Interpreters/Cache/Metadata.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
static bool isQueryInitialized()
{
return CurrentThread::isInitialized()
&& CurrentThread::get().getQueryContext()
&& !CurrentThread::getQueryId().empty();
}
FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CacheGuard::Lock &)
{
if (!isQueryInitialized())
return nullptr;
auto query_iter = query_map.find(std::string(CurrentThread::getQueryId()));
return (query_iter == query_map.end()) ? nullptr : query_iter->second;
}
void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CacheGuard::Lock &)
{
auto query_iter = query_map.find(query_id);
if (query_iter == query_map.end())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Attempt to release query context that does not exist (query_id: {})",
query_id);
}
query_map.erase(query_iter);
}
FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext(
const std::string & query_id,
const ReadSettings & settings,
const CacheGuard::Lock &)
{
if (query_id.empty())
return nullptr;
auto [it, inserted] = query_map.emplace(query_id, nullptr);
if (inserted)
{
it->second = std::make_shared<QueryContext>(
settings.filesystem_cache_max_download_size,
!settings.skip_download_if_exceeds_query_cache);
}
return it->second;
}
FileCacheQueryLimit::QueryContext::QueryContext(
size_t query_cache_size,
bool recache_on_query_limit_exceeded_)
: priority(LRUFileCachePriority(query_cache_size, 0))
, recache_on_query_limit_exceeded(recache_on_query_limit_exceeded_)
{
}
void FileCacheQueryLimit::QueryContext::add(
const FileSegment & file_segment,
const CacheGuard::Lock & lock)
{
const auto key = file_segment.key();
const auto offset = file_segment.offset();
auto it = getPriority().add(
file_segment.getKeyMetadata(), offset, file_segment.range().size(), lock);
auto [_, inserted] = records.emplace(FileCacheKeyAndOffset{key, offset}, it);
if (!inserted)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot add offset {} to query context under key {}, it already exists",
offset, key);
}
}
void FileCacheQueryLimit::QueryContext::remove(
const Key & key,
size_t offset,
const CacheGuard::Lock & lock)
{
auto record = records.find({key, offset});
if (record == records.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no {}:{} in query context", key, offset);
record->second->remove(lock);
records.erase({key, offset});
}
IFileCachePriority::Iterator FileCacheQueryLimit::QueryContext::tryGet(
const Key & key,
size_t offset,
const CacheGuard::Lock &)
{
auto it = records.find({key, offset});
if (it == records.end())
return nullptr;
return it->second;
}
}

View File

@ -0,0 +1,67 @@
#pragma once
#include <Interpreters/Cache/Guards.h>
#include <Interpreters/Cache/LRUFileCachePriority.h>
namespace DB
{
struct ReadSettings;
class FileSegment;
class FileCacheQueryLimit
{
public:
class QueryContext;
using QueryContextPtr = std::shared_ptr<QueryContext>;
QueryContextPtr tryGetQueryContext(const CacheGuard::Lock & lock);
QueryContextPtr getOrSetQueryContext(
const std::string & query_id,
const ReadSettings & settings,
const CacheGuard::Lock &);
void removeQueryContext(const std::string & query_id, const CacheGuard::Lock &);
class QueryContext
{
public:
using Key = FileCacheKey;
using Priority = IFileCachePriority;
using PriorityIterator = IFileCachePriority::Iterator;
QueryContext(size_t query_cache_size, bool recache_on_query_limit_exceeded_);
Priority & getPriority() { return priority; }
const Priority & getPriority() const { return priority; }
bool recacheOnFileCacheQueryLimitExceeded() const { return recache_on_query_limit_exceeded; }
IFileCachePriority::Iterator tryGet(
const Key & key,
size_t offset,
const CacheGuard::Lock &);
void add(
const FileSegment & file_segment,
const CacheGuard::Lock &);
void remove(
const Key & key,
size_t offset,
const CacheGuard::Lock &);
private:
using Records = std::unordered_map<FileCacheKeyAndOffset, IFileCachePriority::Iterator, FileCacheKeyAndOffsetHash>;
Records records;
LRUFileCachePriority priority;
const bool recache_on_query_limit_exceeded;
};
private:
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
QueryContextMap query_map;
};
using FileCacheQueryLimitPtr = std::unique_ptr<FileCacheQueryLimit>;
}

View File

@ -17,17 +17,17 @@ namespace ErrorCodes
}
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_)
: WriteBufferFromFileDecorator(file_segment_->detachWriter())
: WriteBufferFromFileDecorator(std::make_unique<WriteBufferFromFile>(file_segment_->getPathInLocalCache()))
, file_segment(file_segment_)
{
}
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolder && segment_holder_)
WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder_)
: WriteBufferFromFileDecorator(
segment_holder_.file_segments.size() == 1
? segment_holder_.file_segments.front()->detachWriter()
segment_holder_->size() == 1
? std::make_unique<WriteBufferFromFile>(segment_holder_->front().getPathInLocalCache())
: throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment"))
, file_segment(segment_holder_.file_segments.front().get())
, file_segment(&segment_holder_->front())
, segment_holder(std::move(segment_holder_))
{
}

View File

@ -13,7 +13,7 @@ class WriteBufferToFileSegment : public WriteBufferFromFileDecorator, public IRe
{
public:
explicit WriteBufferToFileSegment(FileSegment * file_segment_);
explicit WriteBufferToFileSegment(FileSegmentsHolder && segment_holder);
explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder);
void nextImpl() override;
@ -28,7 +28,7 @@ private:
FileSegment * file_segment;
/// Empty if file_segment is not owned by this WriteBufferToFileSegment
FileSegmentsHolder segment_holder;
FileSegmentsHolderPtr segment_holder;
};

View File

@ -19,7 +19,6 @@
#include <Coordination/KeeperDispatcher.h>
#include <Compression/ICompressionCodec.h>
#include <Core/BackgroundSchedulePool.h>
#include <Core/ServerSettings.h>
#include <Formats/FormatFactory.h>
#include <Databases/IDatabase.h>
#include <Storages/IStorage.h>
@ -43,6 +42,9 @@
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Interpreters/Cache/QueryCache.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Interpreters/Cache/FileCache.h>
#include <Core/ServerSettings.h>
#include <Interpreters/PreparedSets.h>
#include <Core/Settings.h>
#include <Core/SettingsQuirks.h>
@ -107,15 +109,12 @@
#include <Interpreters/Lemmatizers.h>
#include <Interpreters/ClusterDiscovery.h>
#include <Interpreters/TransactionLog.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <filesystem>
#include <re2/re2.h>
#include <Storages/StorageView.h>
#include <Parsers/ASTFunction.h>
#include <base/find_symbols.h>
#include <Interpreters/Cache/FileCache.h>
#if USE_ROCKSDB
#include <rocksdb/table.h>
#endif
@ -536,6 +535,12 @@ struct ContextSharedPart : boost::noncopyable
/// take it as well, which will cause deadlock.
delete_ddl_worker.reset();
/// Background operations in cache use background schedule pool.
/// Deactivate them before destructing it.
const auto & caches = FileCacheFactory::instance().getAll();
for (const auto & [_, cache] : caches)
cache->cache->deactivateBackgroundOperations();
{
auto lock = std::lock_guard(mutex);

View File

@ -20,7 +20,7 @@ static Block getSampleBlock()
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_elements"},
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_file_segment_size"},
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_on_write_operations"},
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "enable_cache_hits_threshold"},
ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_hits_threshold"},
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_size"},
ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_elements"},
ColumnWithTypeAndName{std::make_shared<DataTypeString>(), "path"},
@ -45,7 +45,7 @@ BlockIO InterpreterDescribeCacheQuery::execute()
res_columns[1]->insert(settings.max_elements);
res_columns[2]->insert(settings.max_file_segment_size);
res_columns[3]->insert(settings.cache_on_write_operations);
res_columns[4]->insert(settings.enable_cache_hits_threshold);
res_columns[4]->insert(settings.cache_hits_threshold);
res_columns[5]->insert(cache->getUsedCacheSize());
res_columns[6]->insert(cache->getFileSegmentsNum());
res_columns[7]->insert(cache->getBasePath());

View File

@ -364,12 +364,12 @@ BlockIO InterpreterSystemQuery::execute()
{
auto caches = FileCacheFactory::instance().getAll();
for (const auto & [_, cache_data] : caches)
cache_data->cache->removeIfReleasable();
cache_data->cache->removeAllReleasable();
}
else
{
auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache;
cache->removeIfReleasable();
cache->removeAllReleasable();
}
break;
}

View File

@ -92,12 +92,15 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache and no volume");
}
FileSegmentsHolder TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
{
if (!file_cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache");
return file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
const auto key = FileSegment::Key::random();
auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
fs::create_directories(file_cache->getPathInLocalCache(key));
return holder;
}
TemporaryFileOnDiskHolder TemporaryDataOnDisk::createRegularFile(size_t max_file_size)
@ -237,15 +240,14 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const
LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath());
}
TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_)
TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_)
: parent(parent_)
, header(header_)
, segment_holder(std::move(segments_))
{
if (segment_holder.file_segments.size() != 1)
if (segment_holder->size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment");
auto & segment = segment_holder.file_segments.front();
auto out_buf = std::make_unique<WriteBufferToFileSegment>(segment.get());
auto out_buf = std::make_unique<WriteBufferToFileSegment>(&segment_holder->front());
LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", out_buf->getFileName());
out_writer = std::make_unique<OutputWriter>(std::move(out_buf), header);
@ -336,7 +338,7 @@ void TemporaryFileStream::updateAllocAndCheck()
bool TemporaryFileStream::isEof() const
{
return file == nullptr && segment_holder.empty();
return file == nullptr && !segment_holder;
}
void TemporaryFileStream::release()
@ -356,7 +358,7 @@ void TemporaryFileStream::release()
parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size);
}
if (!segment_holder.empty())
if (segment_holder)
segment_holder.reset();
}
@ -364,8 +366,8 @@ String TemporaryFileStream::getPath() const
{
if (file)
return file->getPath();
if (!segment_holder.file_segments.empty())
return segment_holder.file_segments.front()->getPathInLocalCache();
if (segment_holder && !segment_holder->empty())
return segment_holder->front().getPathInLocalCache();
throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream has no file");
}

View File

@ -103,7 +103,7 @@ public:
const StatAtomic & getStat() const { return stat; }
private:
FileSegmentsHolder createCacheFile(size_t max_file_size);
FileSegmentsHolderPtr createCacheFile(size_t max_file_size);
TemporaryFileOnDiskHolder createRegularFile(size_t max_file_size);
mutable std::mutex mutex;
@ -130,7 +130,7 @@ public:
};
TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_);
TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_);
TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_);
size_t write(const Block & block);
void flush();
@ -161,7 +161,7 @@ private:
/// Data can be stored in file directly or in the cache
TemporaryFileOnDiskHolder file;
FileSegmentsHolder segment_holder;
FileSegmentsHolderPtr segment_holder;
Stat stat;

View File

@ -16,6 +16,9 @@
#include <filesystem>
#include <thread>
#include <DataTypes/DataTypesNumber.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
#include <base/sleep.h>
#include <Poco/ConsoleChannel.h>
#include <Disks/IO/CachedOnDiskWriteBufferFromFile.h>
@ -26,21 +29,6 @@ using namespace DB;
static constexpr auto TEST_LOG_LEVEL = "debug";
void assertRange(
[[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment,
const DB::FileSegment::Range & expected_range, DB::FileSegment::State expected_state)
{
auto range = file_segment->range();
std::cerr << fmt::format("\nAssert #{} : {} == {} (state: {} == {})\n", assert_n,
range.toString(), expected_range.toString(),
toString(file_segment->state()), toString(expected_state));
ASSERT_EQ(range.left, expected_range.left);
ASSERT_EQ(range.right, expected_range.right);
ASSERT_EQ(file_segment->state(), expected_state);
}
void printRanges(const auto & segments)
{
std::cerr << "\nHaving file segments: ";
@ -48,21 +36,16 @@ void printRanges(const auto & segments)
std::cerr << '\n' << segment->range().toString() << " (state: " + DB::FileSegment::stateToString(segment->state()) + ")" << "\n";
}
std::vector<DB::FileSegmentPtr> fromHolder(const DB::FileSegmentsHolder & holder)
{
return std::vector<DB::FileSegmentPtr>(holder.file_segments.begin(), holder.file_segments.end());
}
String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & key, size_t offset)
{
auto key_str = key.toString();
return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset);
}
void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
void download(const std::string & cache_base_path, DB::FileSegment & file_segment)
{
const auto & key = file_segment->key();
size_t size = file_segment->range().size();
const auto & key = file_segment.key();
size_t size = file_segment.range().size();
auto key_str = key.toString();
auto subdir = fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
@ -70,29 +53,94 @@ void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segme
fs::create_directories(subdir);
std::string data(size, '0');
file_segment->write(data.data(), size, file_segment->getCurrentWriteOffset());
file_segment.write(data.data(), size, file_segment.getCurrentWriteOffset(false));
}
void prepareAndDownload(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
{
ASSERT_TRUE(file_segment->reserve(file_segment->range().size()));
download(cache_base_path, file_segment);
}
using Range = FileSegment::Range;
using Ranges = std::vector<Range>;
using State = FileSegment::State;
using States = std::vector<State>;
using Holder = FileSegmentsHolder;
using HolderPtr = FileSegmentsHolderPtr;
void complete(const std::string & cache_base_path, const DB::FileSegmentsHolder & holder)
fs::path caches_dir = fs::current_path() / "lru_cache_test";
std::string cache_base_path = caches_dir / "cache1" / "";
void assertEqual(const HolderPtr & holder, const Ranges & expected_ranges, const States & expected_states = {})
{
for (const auto & file_segment : holder.file_segments)
std::cerr << "Holder: " << holder->toString() << "\n";
ASSERT_EQ(holder->size(), expected_ranges.size());
if (!expected_states.empty())
ASSERT_EQ(holder->size(), expected_states.size());
auto get_expected_state = [&](size_t i)
{
ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, file_segment);
file_segment->completeWithoutState();
if (expected_states.empty())
return State::DOWNLOADED;
else
return expected_states[i];
};
size_t i = 0;
for (const auto & file_segment : *holder)
{
ASSERT_EQ(file_segment->range(), expected_ranges[i]);
ASSERT_EQ(file_segment->state(), get_expected_state(i));
++i;
}
}
FileSegment & get(const HolderPtr & holder, int i)
{
auto it = std::next(holder->begin(), i);
if (it == holder->end())
std::terminate();
return **it;
}
void download(FileSegment & file_segment)
{
std::cerr << "Downloading range " << file_segment.range().toString() << "\n";
ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
ASSERT_TRUE(file_segment.reserve(file_segment.range().size()));
download(cache_base_path, file_segment);
ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
file_segment.complete();
ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
}
void assertDownloadFails(FileSegment & file_segment)
{
ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
ASSERT_FALSE(file_segment.reserve(file_segment.range().size()));
file_segment.complete();
}
void download(const HolderPtr & holder)
{
for (auto & it : *holder)
{
download(*it);
}
}
void increasePriority(const HolderPtr & holder)
{
for (auto & it : *holder)
it->use();
}
class FileCacheTest : public ::testing::Test
{
public:
static void setupLogs(const std::string & level)
{
Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
@ -118,8 +166,6 @@ public:
fs::remove_all(cache_base_path);
}
fs::path caches_dir = fs::current_path() / "lru_cache_test";
std::string cache_base_path = caches_dir / "cache1" / "";
};
TEST_F(FileCacheTest, get)
@ -128,6 +174,14 @@ TEST_F(FileCacheTest, get)
/// To work with cache need query_id and query context.
std::string query_id = "query_id";
Poco::XML::DOMParser dom_parser;
std::string xml(R"CONFIG(<clickhouse>
</clickhouse>)CONFIG");
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
getMutableContext().context->setConfig(config);
auto query_context = DB::Context::createCopy(getContext().context);
query_context->makeQueryContext();
query_context->setCurrentQueryId(query_id);
@ -140,255 +194,249 @@ TEST_F(FileCacheTest, get)
settings.max_elements = 5;
{
std::cerr << "Step 1\n";
auto cache = DB::FileCache(settings);
cache.initialize();
auto key = cache.hash("key1");
auto key = cache.createKeyForPath("key1");
{
auto holder = cache.getOrSet(key, 0, 10, {}); /// Add range [0, 9]
auto segments = fromHolder(holder);
/// Range was not present in cache. It should be added in cache as one while file segment.
ASSERT_EQ(segments.size(), 1);
assertRange(1, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
/// Exception because space not reserved.
/// EXPECT_THROW(download(segments[0]), DB::Exception);
/// Exception because space can be reserved only by downloader
/// EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception);
ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size()));
assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING);
download(cache_base_path, segments[0]);
segments[0]->completeWithoutState();
assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
assertEqual(holder, { Range(0, 9) }, { State::EMPTY });
download(holder->front());
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
increasePriority(holder);
}
/// Current cache: [__________]
/// ^ ^
/// 0 9
assertEqual(cache.getSnapshot(key), { Range(0, 9) });
assertEqual(cache.dumpQueue(), { Range(0, 9) });
ASSERT_EQ(cache.getFileSegmentsNum(), 1);
ASSERT_EQ(cache.getUsedCacheSize(), 10);
std::cerr << "Step 2\n";
{
/// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
auto holder = cache.getOrSet(key, 5, 10, {});
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 2);
assertRange(4, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, segments[1]);
segments[1]->completeWithoutState();
assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY });
download(get(holder, 1));
assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED });
increasePriority(holder);
}
/// Current cache: [__________][_____]
/// ^ ^^ ^
/// 0 910 14
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
std::cerr << "Step 3\n";
/// Get [9, 9]
{
auto holder = cache.getOrSet(key, 9, 1, {}); /// Get [9, 9]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 1);
assertRange(7, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
auto holder = cache.getOrSet(key, 9, 1, {});
assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
increasePriority(holder);
}
assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
/// Get [9, 10]
assertEqual(cache.getOrSet(key, 9, 2, {}),
{ Range(0, 9), Range(10, 14) },
{ State::DOWNLOADED, State::DOWNLOADED });
/// Get [10, 10]
{
auto holder = cache.getOrSet(key, 10, 1, {});
assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED });
increasePriority(holder);
}
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
ASSERT_EQ(cache.getFileSegmentsNum(), 2);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
std::cerr << "Step 4\n";
{
auto holder = cache.getOrSet(key, 17, 4, {});
download(holder); /// Get [17, 20]
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 9, 2, {}); /// Get [9, 10]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 2);
assertRange(8, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
assertRange(9, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
auto holder = cache.getOrSet(key, 24, 3, {});
download(holder); /// Get [24, 26]
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 10, 1, {}); /// Get [10, 10]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 1);
assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
auto holder = cache.getOrSet(key, 27, 1, {});
download(holder); /// Get [27, 27]
increasePriority(holder);
}
complete(cache_base_path, cache.getOrSet(key, 17, 4, {})); /// Get [17, 20]
complete(cache_base_path, cache.getOrSet(key, 24, 3, {})); /// Get [24, 26]
/// completeWithState(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27]
/// Current cache: [__________][_____] [____] [___][]
/// ^ ^^ ^ ^ ^ ^ ^^^
/// 0 910 14 17 20 24 2627
///
ASSERT_EQ(cache.getFileSegmentsNum(), 4);
ASSERT_EQ(cache.getUsedCacheSize(), 22);
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
ASSERT_EQ(cache.getUsedCacheSize(), 23);
std::cerr << "Step 5\n";
{
auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 6);
assertRange(11, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
assertRange(12, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
/// Missing [15, 16] should be added in cache.
assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, segments[2]);
segments[2]->completeWithoutState();
assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
/// New [21, 23], but will not be added in cache because of elements limit (5)
assertRange(15, segments[4], DB::FileSegment::Range(21, 23), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[4]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_FALSE(segments[4]->reserve(1));
assertRange(16, segments[5], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
/// Current cache: [__________][_____][ ][____] [___]
/// ^ ^ ^
/// 0 20 24
///
assertEqual(holder,
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
download(get(holder, 2)); /// [27, 27] was evicted.
assertEqual(holder,
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
assertDownloadFails(get(holder, 4));
assertEqual(holder,
{ Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DETACHED, State::DOWNLOADED });
/// Range [27, 27] must be evicted in previous getOrSet [0, 25].
/// Let's not invalidate pointers to returned segments from range [0, 25] and
/// as max elements size is reached, next attempt to put something in cache should fail.
/// This will also check that [27, 27] was indeed evicted.
auto holder2 = cache.getOrSet(key, 27, 1, {});
assertEqual(holder2, { Range(27, 27) }, { State::EMPTY });
assertDownloadFails(holder2->front());
assertEqual(holder2, { Range(27, 27) }, { State::DETACHED });
auto holder1 = cache.getOrSet(key, 27, 1, {});
auto segments_1 = fromHolder(holder1); /// Get [27, 27]
ASSERT_EQ(segments_1.size(), 1);
assertRange(17, segments_1[0], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
auto holder3 = cache.getOrSet(key, 28, 3, {});
assertEqual(holder3, { Range(28, 30) }, { State::EMPTY });
assertDownloadFails(holder3->front());
assertEqual(holder3, { Range(28, 30) }, { State::DETACHED });
increasePriority(holder);
increasePriority(holder2);
increasePriority(holder3);
}
/// Current cache: [__________][_____][ ][____] [___]
/// ^ ^ ^
/// 0 20 24
///
assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
ASSERT_EQ(cache.getUsedCacheSize(), 24);
std::cerr << "Step 6\n";
{
auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 4);
assertRange(18, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
assertRange(19, segments[1], DB::FileSegment::Range(15, 16), DB::FileSegment::State::DOWNLOADED);
assertRange(20, segments[2], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, segments[3]);
segments[3]->completeWithoutState();
ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED);
assertEqual(holder,
{ Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
download(get(holder, 3));
assertEqual(holder,
{ Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
increasePriority(holder);
}
/// Current cache: [_____][__][____][_] [___]
/// ^ ^ ^ ^ ^
/// 10 17 21 24 26
assertEqual(cache.getSnapshot(key), { Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21), Range(24, 26) });
assertEqual(cache.dumpQueue(), { Range(24, 26), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) });
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
ASSERT_EQ(cache.getUsedCacheSize(), 15);
std::cerr << "Step 7\n";
{
auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 28]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 3);
assertRange(22, segments[0], DB::FileSegment::Range(23, 23), DB::FileSegment::State::EMPTY);
assertRange(23, segments[1], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
assertRange(24, segments[2], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, segments[0]);
prepareAndDownload(cache_base_path, segments[2]);
segments[0]->completeWithoutState();
segments[2]->completeWithoutState();
auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27]
assertEqual(holder,
{ Range(23, 23), Range(24, 26), Range(27, 27) },
{ State::EMPTY, State::DOWNLOADED, State::EMPTY });
download(get(holder, 0));
download(get(holder, 2));
increasePriority(holder);
}
/// Current cache: [____][_] [][___][__]
/// ^ ^ ^^^ ^^ ^
/// 17 21 2324 26 28
{
auto holder5 = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
auto s5 = fromHolder(holder5);
ASSERT_EQ(s5.size(), 1);
assertRange(25, s5[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::EMPTY);
auto holder1 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
auto s1 = fromHolder(holder1);
ASSERT_EQ(s1.size(), 1);
assertRange(26, s1[0], DB::FileSegment::Range(30, 31), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, s5[0]);
prepareAndDownload(cache_base_path, s1[0]);
s5[0]->completeWithoutState();
s1[0]->completeWithoutState();
/// Current cache: [___] [_][___][_] [__]
/// ^ ^ ^ ^ ^ ^ ^ ^
/// 2 4 23 24 26 27 30 31
auto holder2 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
auto s2 = fromHolder(holder2);
ASSERT_EQ(s2.size(), 1);
auto holder3 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
auto s3 = fromHolder(holder3);
ASSERT_EQ(s3.size(), 1);
auto holder4 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
auto s4 = fromHolder(holder4);
ASSERT_EQ(s4.size(), 1);
/// All cache is now unreleasable because pointers are still hold
auto holder6 = cache.getOrSet(key, 0, 40, {});
auto f = fromHolder(holder6);
ASSERT_EQ(f.size(), 9);
assertRange(27, f[0], DB::FileSegment::Range(0, 1), DB::FileSegment::State::EMPTY);
assertRange(28, f[2], DB::FileSegment::Range(5, 22), DB::FileSegment::State::EMPTY);
assertRange(29, f[6], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
assertRange(30, f[8], DB::FileSegment::Range(32, 39), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(f[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(f[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(f[6]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(f[8]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_FALSE(f[0]->reserve(1));
ASSERT_FALSE(f[2]->reserve(1));
ASSERT_FALSE(f[6]->reserve(1));
ASSERT_FALSE(f[8]->reserve(1));
}
/// 17 21 2324 26 27
assertEqual(cache.getSnapshot(key), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
assertEqual(cache.dumpQueue(), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
ASSERT_EQ(cache.getFileSegmentsNum(), 5);
ASSERT_EQ(cache.getUsedCacheSize(), 10);
std::cerr << "Step 8\n";
{
auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 1);
assertRange(31, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
assertEqual(holder, { Range(2, 4) }, { State::EMPTY });
auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
assertEqual(holder2, { Range(30, 31) }, { State::EMPTY });
download(get(holder, 0));
download(get(holder2, 0));
auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED });
auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED });
auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED });
auto holder6 = cache.getOrSet(key, 0, 40, {});
assertEqual(holder6,
{ Range(0, 1), Range(2, 4), Range(5, 22), Range(23, 23), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31), Range(32, 39) },
{ State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY });
assertDownloadFails(get(holder6, 0));
assertDownloadFails(get(holder6, 2));
assertDownloadFails(get(holder6, 6));
assertDownloadFails(get(holder6, 8));
increasePriority(holder);
increasePriority(holder2);
increasePriority(holder3);
increasePriority(holder4);
increasePriority(holder5);
increasePriority(holder6);
}
/// Current cache: [___] [_][___][_] [__]
/// ^ ^ ^ ^ ^ ^ ^ ^
/// 2 4 23 24 26 27 30 31
assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
assertEqual(cache.dumpQueue(), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
std::cerr << "Step 9\n";
/// Get [2, 4]
{
auto holder = cache.getOrSet(key, 2, 3, {});
assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED });
increasePriority(holder);
}
{
auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29]
auto segments = fromHolder(holder);
ASSERT_EQ(segments.size(), 3);
assertEqual(holder,
{ Range(24, 26), Range(27, 27), Range(28, 29) },
{ State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
assertRange(32, segments[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
assertRange(33, segments[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
assertRange(34, segments[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
auto & file_segment = get(holder, 2);
ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
bool lets_start_download = false;
std::mutex mutex;
@ -403,16 +451,13 @@ TEST_F(FileCacheTest, get)
chassert(&DB::CurrentThread::get() == &thread_status_1);
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
auto holder_2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
auto segments_2 = fromHolder(holder_2);
ASSERT_EQ(segments.size(), 3);
auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
assertEqual(holder2,
{ Range(24, 26), Range(27, 27), Range(28, 29) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING });
assertRange(35, segments_2[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
assertRange(36, segments_2[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
assertRange(37, segments_2[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADING);
ASSERT_TRUE(segments[2]->getOrSetDownloader() != DB::FileSegment::getCallerId());
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
auto & file_segment2 = get(holder2, 2);
ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
{
std::lock_guard lock(mutex);
@ -420,8 +465,8 @@ TEST_F(FileCacheTest, get)
}
cv.notify_one();
segments_2[2]->wait();
ASSERT_TRUE(segments_2[2]->state() == DB::FileSegment::State::DOWNLOADED);
file_segment2.wait(file_segment2.range().left);
ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
});
{
@ -429,35 +474,34 @@ TEST_F(FileCacheTest, get)
cv.wait(lock, [&]{ return lets_start_download; });
}
prepareAndDownload(cache_base_path, segments[2]);
segments[2]->completeWithoutState();
ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED);
download(file_segment);
ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
other_1.join();
increasePriority(holder);
}
/// Current cache: [___] [___][_][__][__]
/// ^ ^ ^ ^ ^^ ^^ ^
/// 2 4 24 26 27 2930 31
assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31) });
assertEqual(cache.dumpQueue(), { Range(30, 31), Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29) });
std::cerr << "Step 10\n";
{
/// Now let's check the similar case but getting ERROR state after segment->wait(), when
/// state is changed not manually via segment->completeWithState(state) but from destructor of holder
/// and notify_all() is also called from destructor of holder.
std::optional<DB::FileSegmentsHolder> holder;
holder.emplace(cache.getOrSet(key, 3, 23, {})); /// Get [3, 25]
auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25]
assertEqual(holder,
{ Range(2, 4), Range(5, 23), Range(24, 26) },
{ State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
auto segments = fromHolder(*holder);
ASSERT_EQ(segments.size(), 3);
assertRange(38, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
assertRange(39, segments[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::EMPTY);
ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADING);
assertRange(40, segments[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
auto & file_segment = get(holder, 1);
ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
bool lets_start_download = false;
std::mutex mutex;
@ -472,16 +516,13 @@ TEST_F(FileCacheTest, get)
chassert(&DB::CurrentThread::get() == &thread_status_1);
DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
auto holder_2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
auto segments_2 = fromHolder(*holder);
ASSERT_EQ(segments_2.size(), 3);
auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
assertEqual(holder,
{ Range(2, 4), Range(5, 23), Range(24, 26) },
{ State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED });
assertRange(41, segments_2[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
assertRange(42, segments_2[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADING);
assertRange(43, segments_2[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
ASSERT_TRUE(segments_2[1]->getDownloader() != DB::FileSegment::getCallerId());
ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::DOWNLOADING);
auto & file_segment2 = get(holder, 1);
ASSERT_TRUE(file_segment2.getDownloader() != FileSegment::getCallerId());
{
std::lock_guard lock(mutex);
@ -489,13 +530,10 @@ TEST_F(FileCacheTest, get)
}
cv.notify_one();
segments_2[1]->wait();
printRanges(segments_2);
ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
prepareAndDownload(cache_base_path, segments_2[1]);
segments_2[1]->completeWithoutState();
file_segment2.wait(file_segment2.range().left);
ASSERT_TRUE(file_segment2.state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
ASSERT_TRUE(file_segment2.getOrSetDownloader() == DB::FileSegment::getCallerId());
download(file_segment2);
});
{
@ -505,8 +543,7 @@ TEST_F(FileCacheTest, get)
holder.reset();
other_1.join();
printRanges(segments);
ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADED);
ASSERT_TRUE(file_segment.state() == DB::FileSegment::State::DOWNLOADED);
}
}
@ -514,55 +551,103 @@ TEST_F(FileCacheTest, get)
/// ^ ^^ ^ ^^ ^ ^
/// 2 45 24 2627 28 29
std::cerr << "Step 11\n";
{
/// Test LRUCache::restore().
auto cache2 = DB::FileCache(settings);
cache2.initialize();
auto key = cache2.hash("key1");
auto key = cache2.createKeyForPath("key1");
auto holder1 = cache2.getOrSet(key, 2, 28, {}); /// Get [2, 29]
auto segments1 = fromHolder(holder1);
ASSERT_EQ(segments1.size(), 5);
assertRange(44, segments1[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
assertRange(45, segments1[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADED);
assertRange(45, segments1[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
assertRange(46, segments1[3], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
assertRange(47, segments1[4], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADED);
/// Get [2, 29]
assertEqual(cache2.getOrSet(key, 2, 28, {}),
{ Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29) },
{ State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
}
std::cerr << "Step 12\n";
{
/// Test max file segment size
auto settings2 = settings;
settings2.max_file_segment_size = 10;
settings2.base_path = caches_dir / "cache2";
fs::create_directories(settings2.base_path);
auto cache2 = DB::FileCache(settings2);
cache2.initialize();
auto key = cache2.hash("key1");
auto key = cache2.createKeyForPath("key1");
auto holder1 = cache2.getOrSet(key, 0, 25, {}); /// Get [0, 24]
auto segments1 = fromHolder(holder1);
ASSERT_EQ(segments1.size(), 3);
assertRange(48, segments1[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
assertRange(49, segments1[1], DB::FileSegment::Range(10, 19), DB::FileSegment::State::EMPTY);
assertRange(50, segments1[2], DB::FileSegment::Range(20, 24), DB::FileSegment::State::EMPTY);
/// Get [0, 24]
assertEqual(cache2.getOrSet(key, 0, 25, {}),
{ Range(0, 9), Range(10, 19), Range(20, 24) },
{ State::EMPTY, State::EMPTY, State::EMPTY });
}
std::cerr << "Step 13\n";
{
/// Test delated cleanup
auto cache = FileCache(settings);
cache.initialize();
cache.cleanup();
const auto key = cache.createKeyForPath("key10");
const auto key_path = cache.getPathInLocalCache(key);
cache.removeAllReleasable();
ASSERT_EQ(cache.getUsedCacheSize(), 0);
ASSERT_TRUE(!fs::exists(key_path));
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
download(cache.getOrSet(key, 0, 10, {}));
ASSERT_EQ(cache.getUsedCacheSize(), 10);
ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
cache.removeAllReleasable();
ASSERT_EQ(cache.getUsedCacheSize(), 0);
ASSERT_TRUE(fs::exists(key_path));
ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
cache.cleanup();
ASSERT_TRUE(!fs::exists(key_path));
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
}
std::cerr << "Step 14\n";
{
/// Test background thread delated cleanup
auto settings2{settings};
settings2.delayed_cleanup_interval_ms = 0;
auto cache = DB::FileCache(settings2);
cache.initialize();
const auto key = cache.createKeyForPath("key10");
const auto key_path = cache.getPathInLocalCache(key);
cache.removeAllReleasable();
ASSERT_EQ(cache.getUsedCacheSize(), 0);
ASSERT_TRUE(!fs::exists(key_path));
ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
download(cache.getOrSet(key, 0, 10, {}));
ASSERT_EQ(cache.getUsedCacheSize(), 10);
ASSERT_TRUE(fs::exists(key_path));
cache.removeAllReleasable();
ASSERT_EQ(cache.getUsedCacheSize(), 0);
sleepForSeconds(2);
ASSERT_TRUE(!fs::exists(key_path));
}
}
TEST_F(FileCacheTest, writeBuffer)
{
DB::FileCacheSettings settings;
FileCacheSettings settings;
settings.max_size = 100;
settings.max_elements = 5;
settings.max_file_segment_size = 5;
settings.base_path = cache_base_path;
DB::FileCache cache(settings);
FileCache cache(settings);
cache.initialize();
auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush)
@ -571,10 +656,13 @@ TEST_F(FileCacheTest, writeBuffer)
segment_settings.kind = FileSegmentKind::Temporary;
segment_settings.unbounded = true;
auto holder = cache.set(cache.hash(key), 0, 3, segment_settings);
EXPECT_EQ(holder.file_segments.size(), 1);
auto & segment = holder.file_segments.front();
WriteBufferToFileSegment out(segment.get());
auto cache_key = cache.createKeyForPath(key);
auto holder = cache.set(cache_key, 0, 3, segment_settings);
/// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
std::filesystem::create_directories(cache.getPathInLocalCache(cache_key));
EXPECT_EQ(holder->size(), 1);
auto & segment = holder->front();
WriteBufferToFileSegment out(&segment);
std::list<std::thread> threads;
std::mutex mu;
for (const auto & s : data)
@ -600,18 +688,18 @@ TEST_F(FileCacheTest, writeBuffer)
std::vector<fs::path> file_segment_paths;
{
auto holder = write_to_cache("key1", {"abc", "defg"}, false);
file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache());
file_segment_paths.emplace_back(holder->front().getPathInLocalCache());
ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7);
ASSERT_TRUE(holder.file_segments.front()->range() == FileSegment::Range(0, 7));
ASSERT_TRUE(holder->front().range() == FileSegment::Range(0, 7));
ASSERT_EQ(cache.getUsedCacheSize(), 7);
{
auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}, true);
file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache());
file_segment_paths.emplace_back(holder2->front().getPathInLocalCache());
ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15);
ASSERT_TRUE(holder2.file_segments.front()->range() == FileSegment::Range(0, 15));
ASSERT_TRUE(holder2->front().range() == FileSegment::Range(0, 15));
ASSERT_EQ(cache.getUsedCacheSize(), 22);
}
ASSERT_FALSE(fs::exists(file_segment_paths.back()));
@ -668,17 +756,16 @@ TEST_F(FileCacheTest, temporaryData)
auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, 0);
auto some_data_holder = file_cache.getOrSet(file_cache.hash("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
{
auto segments = fromHolder(some_data_holder);
ASSERT_EQ(segments.size(), 5);
for (auto & segment : segments)
ASSERT_EQ(some_data_holder->size(), 5);
for (auto & segment : *some_data_holder)
{
ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segment->reserve(segment->range().size()));
download(cache_base_path, segment);
segment->completeWithoutState();
download(*segment);
segment->complete();
}
}

View File

@ -18,6 +18,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes()
{"cache_name", std::make_shared<DataTypeString>()},
{"cache_base_path", std::make_shared<DataTypeString>()},
{"cache_path", std::make_shared<DataTypeString>()},
{"key", std::make_shared<DataTypeString>()},
{"file_segment_range_begin", std::make_shared<DataTypeUInt64>()},
{"file_segment_range_end", std::make_shared<DataTypeUInt64>()},
{"size", std::make_shared<DataTypeUInt64>()},
@ -45,27 +46,27 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
const auto & cache = cache_data->cache;
auto file_segments = cache->getSnapshot();
for (const auto & file_segment : file_segments)
for (const auto & file_segment : *file_segments)
{
res_columns[0]->insert(cache_name);
res_columns[1]->insert(cache->getBasePath());
/// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference
/// (because file_segments in getSnapshot doesn't have `cache` field set)
res_columns[2]->insert(
cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
res_columns[2]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
res_columns[3]->insert(file_segment->key().toString());
const auto & range = file_segment->range();
res_columns[3]->insert(range.left);
res_columns[4]->insert(range.right);
res_columns[5]->insert(range.size());
res_columns[6]->insert(FileSegment::stateToString(file_segment->state()));
res_columns[7]->insert(file_segment->getHitsCount());
res_columns[8]->insert(file_segment->getRefCount());
res_columns[9]->insert(file_segment->getDownloadedSize());
res_columns[10]->insert(file_segment->isPersistent());
res_columns[11]->insert(toString(file_segment->getKind()));
res_columns[12]->insert(file_segment->isUnbound());
res_columns[4]->insert(range.left);
res_columns[5]->insert(range.right);
res_columns[6]->insert(range.size());
res_columns[7]->insert(FileSegment::stateToString(file_segment->state()));
res_columns[8]->insert(file_segment->getHitsCount());
res_columns[9]->insert(file_segment->getRefCount());
res_columns[10]->insert(file_segment->getDownloadedSize(false));
res_columns[11]->insert(file_segment->isPersistent());
res_columns[12]->insert(toString(file_segment->getKind()));
res_columns[13]->insert(file_segment->isUnbound());
}
}
}

View File

@ -1,4 +1,5 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeEnum.h>
@ -55,15 +56,38 @@ namespace
if constexpr (std::is_same_v<Factory, FunctionFactory>)
{
if (factory.isAlias(name))
{
res_columns[6]->insertDefault();
res_columns[7]->insertDefault();
res_columns[8]->insertDefault();
res_columns[9]->insertDefault();
res_columns[10]->insertDefault();
res_columns[11]->insertDefault();
}
else
res_columns[6]->insert(factory.getDocumentation(name).description);
{
auto documentation = factory.getDocumentation(name);
res_columns[6]->insert(documentation.description);
res_columns[7]->insertDefault();
res_columns[8]->insertDefault();
res_columns[9]->insertDefault();
res_columns[10]->insert(documentation.examplesAsString());
res_columns[11]->insert(documentation.categoriesAsString());
}
}
else
{
res_columns[6]->insertDefault();
res_columns[7]->insertDefault();
res_columns[8]->insertDefault();
res_columns[9]->insertDefault();
res_columns[10]->insertDefault();
res_columns[11]->insertDefault();
}
}
}
std::vector<std::pair<String, Int8>> getOriginEnumsAndValues()
{
return std::vector<std::pair<String, Int8>>{
@ -83,6 +107,11 @@ NamesAndTypesList StorageSystemFunctions::getNamesAndTypes()
{"create_query", std::make_shared<DataTypeString>()},
{"origin", std::make_shared<DataTypeEnum8>(getOriginEnumsAndValues())},
{"description", std::make_shared<DataTypeString>()},
{"syntax", std::make_shared<DataTypeString>()},
{"arguments", std::make_shared<DataTypeString>()},
{"returned_value", std::make_shared<DataTypeString>()},
{"examples", std::make_shared<DataTypeString>()},
{"categories", std::make_shared<DataTypeString>()}
};
}

View File

@ -82,7 +82,7 @@ Pipe StorageSystemRemoteDataPaths::read(
if (cache)
{
auto cache_paths = cache->tryGetCachePaths(cache->hash(object.getPathKeyForCache()));
auto cache_paths = cache->tryGetCachePaths(cache->createKeyForPath(object.getPathKeyForCache()));
col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
}
else

View File

@ -55,52 +55,58 @@
<type>cache</type>
<disk>s3_disk</disk>
<path>s3_cache/</path>
<max_size>2147483648</max_size>
<max_size>128Mi</max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache>
<s3_cache_2>
<type>cache</type>
<disk>s3_disk_2</disk>
<path>s3_cache_2/</path>
<max_size>2Gi</max_size>
<max_size>128Mi</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<max_file_segment_size>100Mi</max_file_segment_size>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_2>
<s3_cache_3>
<type>cache</type>
<disk>s3_disk_3</disk>
<path>s3_disk_3_cache/</path>
<max_size>22548578304</max_size>
<max_size>128Mi</max_size>
<data_cache_max_size>22548578304</data_cache_max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_3>
<s3_cache_4>
<type>cache</type>
<disk>s3_disk_4</disk>
<path>s3_cache_4/</path>
<max_size>22548578304</max_size>
<max_size>128Mi</max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<enable_filesystem_query_cache_limit>1</enable_filesystem_query_cache_limit>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_4>
<s3_cache_5>
<type>cache</type>
<disk>s3_disk_5</disk>
<path>s3_cache_5/</path>
<max_size>22548578304</max_size>
<max_size>128Mi</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_5>
<s3_cache_6>
<type>cache</type>
<disk>s3_disk_6</disk>
<path>s3_cache_6/</path>
<max_size>22548578304</max_size>
<max_size>128Mi</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<enable_bypass_cache_with_threashold>1</enable_bypass_cache_with_threashold>
<bypass_cache_threashold>100</bypass_cache_threashold>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_6>
<s3_cache_small>
<type>cache</type>
@ -108,15 +114,17 @@
<path>s3_cache_small/</path>
<max_size>1000</max_size>
<do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_small>
<s3_cache_small_segment_size>
<type>cache</type>
<disk>s3_disk_6</disk>
<path>s3_cache_small_segment_size/</path>
<max_size>22548578304</max_size>
<max_size>128Mi</max_size>
<max_file_segment_size>10Ki</max_file_segment_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<cache_on_write_operations>1</cache_on_write_operations>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_small_segment_size>
<!-- local disks -->
<local_disk>
@ -139,6 +147,7 @@
<max_size>22548578304</max_size>
<cache_on_write_operations>1</cache_on_write_operations>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</local_cache>
<local_cache_2>
<type>cache</type>
@ -146,6 +155,7 @@
<path>local_cache_2/</path>
<max_size>22548578304</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</local_cache_2>
<local_cache_3>
<type>cache</type>
@ -155,6 +165,7 @@
<cache_on_write_operations>1</cache_on_write_operations>
<enable_cache_hits_threshold>1</enable_cache_hits_threshold>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</local_cache_3>
<!-- multi layer cache -->
<s3_cache_multi>
@ -163,6 +174,7 @@
<path>s3_cache_multi/</path>
<max_size>22548578304</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_multi>
<s3_cache_multi_2>
<type>cache</type>
@ -170,6 +182,7 @@
<path>s3_cache_multi_2/</path>
<max_size>22548578304</max_size>
<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
<delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
</s3_cache_multi_2>
</disks>
<policies>

View File

@ -11,6 +11,7 @@
<skip_access_check>true</skip_access_check>
<!-- Avoid extra retries to speed up tests -->
<retry_attempts>0</retry_attempts>
<connect_timeout_ms>20000</connect_timeout_ms>
</s3>
<s3_retryable>
<type>s3</type>
@ -20,6 +21,7 @@
<secret_access_key>minio123</secret_access_key>
<!-- ClickHouse starts earlier than custom S3 endpoint. Skip access check to avoid fail on start-up -->
<skip_access_check>true</skip_access_check>
<connect_timeout_ms>20000</connect_timeout_ms>
</s3_retryable>
<s3_no_retries>
<type>s3</type>
@ -32,6 +34,7 @@
<!-- Avoid extra retries to speed up tests -->
<s3_retry_attempts>1</s3_retry_attempts>
<s3_max_single_read_retries>1</s3_max_single_read_retries>
<connect_timeout_ms>20000</connect_timeout_ms>
</s3_no_retries>
<default/>
</disks>

View File

@ -281,7 +281,12 @@ CREATE TABLE system.functions
`alias_to` String,
`create_query` String,
`origin` Enum8('System' = 0, 'SQLUserDefined' = 1, 'ExecutableUserDefined' = 2),
`description` String
`description` String,
`syntax` String,
`arguments` String,
`returned_value` String,
`examples` String,
`categories` String
)
ENGINE = SystemFunctions
COMMENT 'SYSTEM TABLE is built on the fly.'

View File

@ -40,6 +40,8 @@
{'key1':1111,'key2':2222,'key5':500,'key6':600}
{'key1':1112,'key2':2224,'key5':500,'key6':600}
{'key1':1113,'key2':2226,'key5':500,'key6':600}
{'key5':500,'key6':600}
{'key5':500,'key6':600}
1
1
1

View File

@ -11,6 +11,8 @@ SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -
SELECT mapConcat(col, map('key5', 500), map('key6', 600)) FROM table_map ORDER BY id;
SELECT mapConcat(col, materialize(map('key5', 500)), map('key6', 600)) FROM table_map ORDER BY id;
SELECT concat(map('key5', 500), map('key6', 600));
SELECT map('key5', 500) || map('key6', 600);
SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id;
SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id;

View File

@ -1,10 +1,60 @@
Using storage policy: s3_cache
0 79 80
0 745 746
0 745 746
0 745 746
0
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Using storage policy: local_cache
0 79 80
0 745 746
0 745 746
0 745 746
0
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache
Expect cache
DOWNLOADED 0 79 80
DOWNLOADED 0 745 746
2
Expect no cache

View File

@ -9,34 +9,69 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
for STORAGE_POLICY in 's3_cache' 'local_cache'; do
echo "Using storage policy: $STORAGE_POLICY"
${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES"
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy"
${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
echo 'Expect cache'
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
echo 'Expect no cache'
${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
done

View File

@ -1,2 +1,2 @@
2147483648 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
2147483648 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0
134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0
134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0