Merge branch 'master' into enable-memory-overcommit

This commit is contained in:
mergify[bot] 2022-05-05 13:29:20 +00:00 committed by GitHub
commit 6d3a20b2a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 189 additions and 74 deletions

View File

@ -25,7 +25,6 @@
* Introduce format `ProtobufList` (all records as repeated messages in out Protobuf). Closes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)).
* Add `toLastDayOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)).
* New aggregation function groupSortedArray to obtain an array of first N values. [#34055](https://github.com/ClickHouse/ClickHouse/pull/34055) ([palegre-tiny](https://github.com/palegre-tiny)).
* Added load balancing setting for \[Zoo\]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)).
* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)).
* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).

View File

@ -190,7 +190,7 @@ def process_benchmark_results(args):
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.debug(f"Reading benchmark result from {result_file}")
logging.info(f"Reading benchmark result from {result_file}")
with open(result_file, "r") as f:
result = json.loads(f.read())
for item in result:

View File

@ -16,6 +16,7 @@
M(QueryTimeMicroseconds, "Total time of all queries.") \
M(SelectQueryTimeMicroseconds, "Total time of SELECT queries.") \
M(InsertQueryTimeMicroseconds, "Total time of INSERT queries.") \
M(OtherQueryTimeMicroseconds, "Total time of queries that are not SELECT or INSERT.") \
M(FileOpen, "Number of files opened.") \
M(Seek, "Number of times the 'lseek' function was called.") \
M(ReadBufferFromFileDescriptorRead, "Number of reads (read/pread) from a file descriptor. Does not include sockets.") \
@ -240,18 +241,23 @@
M(NotCreatedLogEntryForMutation, "Log entry to mutate parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \
\
M(S3ReadMicroseconds, "Time of GET and HEAD requests to S3 storage.") \
M(S3ReadBytes, "Read bytes (incoming) in GET and HEAD requests to S3 storage.") \
M(S3ReadRequestsCount, "Number of GET and HEAD requests to S3 storage.") \
M(S3ReadRequestsErrors, "Number of non-throttling errors in GET and HEAD requests to S3 storage.") \
M(S3ReadRequestsThrottling, "Number of 429 and 503 errors in GET and HEAD requests to S3 storage.") \
M(S3ReadRequestsRedirects, "Number of redirects in GET and HEAD requests to S3 storage.") \
\
M(S3WriteMicroseconds, "Time of POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteBytes, "Write bytes (outgoing) in POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteRequestsCount, "Number of POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \
\
M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
\
M(WriteBufferFromS3Bytes, "Bytes written to S3.") \
\
M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
\
M(RemoteFSReadMicroseconds, "Time of reading from remote filesystem.") \

View File

@ -122,7 +122,7 @@ void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfigura
for (auto setting : settings.all())
{
const auto & name = setting.getName();
if (config.has(name))
if (config.has(name) && !setting.isObsolete())
{
throw Exception(fmt::format("A setting '{}' appeared at top level in config {}."
" But it is user-level setting that should be located in users.xml inside <profiles> section for specific profile."

View File

@ -19,9 +19,9 @@
namespace ProfileEvents
{
extern const Event S3ReadMicroseconds;
extern const Event S3ReadBytes;
extern const Event S3ReadRequestsErrors;
extern const Event ReadBufferFromS3Microseconds;
extern const Event ReadBufferFromS3Bytes;
extern const Event ReadBufferFromS3RequestsErrors;
extern const Event ReadBufferSeekCancelConnection;
}
@ -121,14 +121,14 @@ bool ReadBufferFromS3::nextImpl()
/// Try to read a next portion of data.
next_result = impl->next();
watch.stop();
ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Microseconds, watch.elapsedMicroseconds());
break;
}
catch (const Exception & e)
{
watch.stop();
ProfileEvents::increment(ProfileEvents::S3ReadMicroseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::S3ReadRequestsErrors, 1);
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Microseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3RequestsErrors, 1);
LOG_DEBUG(
log,
@ -157,7 +157,7 @@ bool ReadBufferFromS3::nextImpl()
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl`
ProfileEvents::increment(ProfileEvents::S3ReadBytes, working_buffer.size());
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, working_buffer.size());
offset += working_buffer.size();
return true;

View File

@ -20,7 +20,7 @@
namespace ProfileEvents
{
extern const Event S3WriteBytes;
extern const Event WriteBufferFromS3Bytes;
extern const Event RemoteFSCacheDownloadBytes;
}
@ -121,7 +121,7 @@ void WriteBufferFromS3::nextImpl()
}
}
ProfileEvents::increment(ProfileEvents::S3WriteBytes, offset());
ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, offset());
last_part_size += offset();

View File

@ -79,6 +79,7 @@ namespace ProfileEvents
extern const Event QueryTimeMicroseconds;
extern const Event SelectQueryTimeMicroseconds;
extern const Event InsertQueryTimeMicroseconds;
extern const Event OtherQueryTimeMicroseconds;
}
namespace DB
@ -801,6 +802,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, query_time);
}
else
{
ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, query_time);
}
element.query_duration_ms = info.elapsed_seconds * 1000;

View File

@ -182,24 +182,21 @@ String StorageS3Source::DisclosedGlobIterator::next()
class StorageS3Source::KeysIterator::Impl
{
public:
explicit Impl(const std::vector<String> & keys_) : keys(keys_), keys_iter(keys.begin())
explicit Impl(const std::vector<String> & keys_) : keys(keys_)
{
}
String next()
{
std::lock_guard lock(mutex);
if (keys_iter == keys.end())
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= keys.size())
return "";
auto key = *keys_iter;
++keys_iter;
return key;
return keys[current_index];
}
private:
std::mutex mutex;
Strings keys;
Strings::iterator keys_iter;
std::atomic_size_t index = 0;
};
StorageS3Source::KeysIterator::KeysIterator(const std::vector<String> & keys_) : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(keys_))
@ -211,6 +208,39 @@ String StorageS3Source::KeysIterator::next()
return pimpl->next();
}
class StorageS3Source::ReadTasksIterator::Impl
{
public:
explicit Impl(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
: read_tasks(read_tasks_), new_read_tasks_callback(new_read_tasks_callback_)
{
}
String next()
{
size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
if (current_index >= read_tasks.size())
return new_read_tasks_callback();
return read_tasks[current_index];
}
private:
std::atomic_size_t index = 0;
std::vector<String> read_tasks;
ReadTaskCallback new_read_tasks_callback;
};
StorageS3Source::ReadTasksIterator::ReadTasksIterator(
const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_)
: pimpl(std::make_shared<StorageS3Source::ReadTasksIterator::Impl>(read_tasks_, new_read_tasks_callback_))
{
}
String StorageS3Source::ReadTasksIterator::next()
{
return pimpl->next();
}
Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
{
for (const auto & virtual_column : requested_virtual_columns)
@ -580,7 +610,15 @@ StorageS3::StorageS3(
updateS3Configuration(context_, s3_configuration);
if (columns_.empty())
{
auto columns = getTableStructureFromDataImpl(format_name, s3_configuration, compression_method, distributed_processing_, is_key_with_globs, format_settings, context_);
auto columns = getTableStructureFromDataImpl(
format_name,
s3_configuration,
compression_method,
distributed_processing_,
is_key_with_globs,
format_settings,
context_,
&read_tasks_used_in_schema_inference);
storage_metadata.setColumns(columns);
}
else
@ -598,13 +636,20 @@ StorageS3::StorageS3(
virtual_columns = getVirtualsForStorage(columns, default_virtuals);
}
std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(const S3Configuration & s3_configuration, const std::vector<String> & keys, bool is_key_with_globs, bool distributed_processing, ContextPtr local_context)
std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
const S3Configuration & s3_configuration,
const std::vector<String> & keys,
bool is_key_with_globs,
bool distributed_processing,
ContextPtr local_context,
const std::vector<String> & read_tasks)
{
if (distributed_processing)
{
return std::make_shared<StorageS3Source::IteratorWrapper>(
[callback = local_context->getReadTaskCallback()]() -> String {
return callback();
[read_tasks_iterator = std::make_shared<StorageS3Source::ReadTasksIterator>(read_tasks, local_context->getReadTaskCallback())]() -> String
{
return read_tasks_iterator->next();
});
}
else if (is_key_with_globs)
@ -653,7 +698,7 @@ Pipe StorageS3::read(
requested_virtual_columns.push_back(virtual_column);
}
std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(s3_configuration, keys, is_key_with_globs, distributed_processing, local_context);
std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(s3_configuration, keys, is_key_with_globs, distributed_processing, local_context, read_tasks_used_in_schema_inference);
ColumnsDescription columns_description;
Block block_for_format;
@ -947,13 +992,15 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
bool distributed_processing,
bool is_key_with_globs,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx)
ContextPtr ctx,
std::vector<String> * read_keys_in_distributed_processing)
{
auto file_iterator = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx);
ReadBufferIterator read_buffer_iterator = [&, first = false]() mutable -> std::unique_ptr<ReadBuffer>
{
auto key = (*file_iterator)();
if (key.empty())
{
if (first)
@ -966,6 +1013,9 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
return nullptr;
}
if (distributed_processing && read_keys_in_distributed_processing)
read_keys_in_distributed_processing->push_back(key);
first = false;
return wrapReadBufferWithCompressionMethod(
std::make_unique<ReadBufferFromS3>(

View File

@ -55,6 +55,18 @@ public:
std::shared_ptr<Impl> pimpl;
};
class ReadTasksIterator
{
public:
ReadTasksIterator(const std::vector<String> & read_tasks_, const ReadTaskCallback & new_read_tasks_callback_);
String next();
private:
class Impl;
/// shared_ptr to have copy constructor
std::shared_ptr<Impl> pimpl;
};
using IteratorWrapper = std::function<String()>;
static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
@ -200,9 +212,17 @@ private:
ASTPtr partition_by;
bool is_key_with_globs = false;
std::vector<String> read_tasks_used_in_schema_inference;
static void updateS3Configuration(ContextPtr, S3Configuration &);
static std::shared_ptr<StorageS3Source::IteratorWrapper> createFileIterator(const S3Configuration & s3_configuration, const std::vector<String> & keys, bool is_key_with_globs, bool distributed_processing, ContextPtr local_context);
static std::shared_ptr<StorageS3Source::IteratorWrapper> createFileIterator(
const S3Configuration & s3_configuration,
const std::vector<String> & keys,
bool is_key_with_globs,
bool distributed_processing,
ContextPtr local_context,
const std::vector<String> & read_tasks = {});
static ColumnsDescription getTableStructureFromDataImpl(
const String & format,
@ -211,7 +231,8 @@ private:
bool distributed_processing,
bool is_key_with_globs,
const std::optional<FormatSettings> & format_settings,
ContextPtr ctx);
ContextPtr ctx,
std::vector<String> * read_keys_in_distributed_processing = nullptr);
bool isColumnOriented() const override;
};

View File

@ -32,25 +32,27 @@ def cluster():
init_list = {
"ReadBufferFromS3Bytes": 0,
"S3ReadMicroseconds": 0,
"S3ReadBytes": 0,
"ReadBufferFromS3Microseconds": 0,
"ReadBufferFromS3RequestsErrors": 0,
"S3ReadRequestsCount": 0,
"S3ReadRequestsErrorsTotal": 0,
"S3ReadRequestsErrors503": 0,
"S3ReadRequestsRedirects": 0,
"S3WriteMicroseconds": 0,
"S3WriteBytes": 0,
"S3WriteRequestsCount": 0,
"S3WriteRequestsErrorsTotal": 0,
"S3WriteRequestsErrors503": 0,
"S3WriteRequestsRedirects": 0,
"WriteBufferFromS3Bytes": 0,
}
def get_s3_events(instance):
result = init_list.copy()
events = instance.query(
"SELECT event,value FROM system.events WHERE event LIKE 'S3%'"
"SELECT event,value FROM system.events WHERE event LIKE '%S3%'"
).split("\n")
for event in events:
ev = event.split("\t")
@ -108,7 +110,7 @@ def get_query_stat(instance, hint):
for event in events:
ev = event.split("\t")
if len(ev) == 2:
if ev[0].startswith("S3"):
if "S3" in ev[0]:
result[ev[0]] += int(ev[1])
return result
@ -151,7 +153,9 @@ def test_profile_events(cluster):
stat1 = get_query_stat(instance, query1)
for metric in stat1:
assert stat1[metric] == metrics1[metric] - metrics0[metric]
assert metrics1["S3WriteBytes"] - metrics0["S3WriteBytes"] == size1
assert (
metrics1["WriteBufferFromS3Bytes"] - metrics0["WriteBufferFromS3Bytes"] == size1
)
query2 = "INSERT INTO test_s3.test_s3 FORMAT Values"
instance.query(query2 + " (1,1)")
@ -171,7 +175,10 @@ def test_profile_events(cluster):
stat2 = get_query_stat(instance, query2)
for metric in stat2:
assert stat2[metric] == metrics2[metric] - metrics1[metric]
assert metrics2["S3WriteBytes"] - metrics1["S3WriteBytes"] == size2 - size1
assert (
metrics2["WriteBufferFromS3Bytes"] - metrics1["WriteBufferFromS3Bytes"]
== size2 - size1
)
query3 = "SELECT * from test_s3.test_s3"
assert instance.query(query3) == "1\t1\n"

View File

@ -96,22 +96,6 @@
26 27 28
0 0 0
0 0 0
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
@ -124,14 +108,6 @@
26 27 28
0 0 0
0 0 0
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
@ -144,14 +120,54 @@
26 27 28
0 0 0
0 0 0
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
7 8 9
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
7 8 9
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
7 8 9
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6
7 8 9
10 11 12
13 14 15
16 17 18
20 21 22
23 24 25
26 27 28
0 0 0
0 0 0
0 0 0
1 2 3
4 5 6

View File

@ -2,16 +2,27 @@ ClickHouse website is built alongside it's documentation via [docs/tools](https:
# How to quickly test the main page of the website
```
# If you have old OS distribution,
# Run this from repository root:
docker run -it --rm --network host --volume $(pwd):/workspace ubuntu:20.04 /bin/bash
cd workspace/docs/tools
apt update
apt install sudo python pip git
pip3 install -r requirements.txt
git config --global --add safe.directory /workspace
./build.py --skip-multi-page --skip-blog --skip-docs --livereload 8080
```
```
cd ../docs/tools
sudo apt install python-3 pip
pip3 install -r requirements.txt
# This is needed only when documentation is included
sudo npm install -g purify-css amphtml-validator
virtualenv build
./build.py --skip-multi-page --skip-single-page --skip-amp --skip-blog --skip-git-log --skip-docs --livereload 8080
./build.py --skip-multi-page --skip-blog --skip-docs --livereload 8080
# Open the web browser and go to http://localhost:8080/
```
@ -19,11 +30,11 @@ virtualenv build
# How to quickly test the blog
```
./build.py --skip-multi-page --skip-single-page --skip-amp --skip-git-log --skip-docs --livereload 8080
./build.py --skip-multi-page --skip-docs --livereload 8080
```
# How to quickly test the broken links in docs
```
./build.py --skip-multi-page --skip-amp --skip-blog --skip-git-log --lang en --livereload 8080
./build.py --skip-multi-page --skip-blog --lang en --livereload 8080
```

View File

@ -48,7 +48,7 @@
[0.261, 0.237, 0.231],
[0.029, 0.013, 0.014],
[0.017, 0.013, 0.011],
[0.003, 0.002, 0.003],
[0.003, 0.002, 0.003]
]
}
]