mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge pull request #16425 from filimonov/minumum
Fix typos reported by codespell
This commit is contained in:
commit
ba34145817
@ -409,7 +409,7 @@
|
||||
|
||||
## ClickHouse release 20.6
|
||||
|
||||
### ClickHouse release v20.6.3.28-stable
|
||||
### ClickHouse release v20.6.3.28-stable
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -2362,7 +2362,7 @@ No changes compared to v20.4.3.16-stable.
|
||||
* `Live View` table engine refactoring. [#8519](https://github.com/ClickHouse/ClickHouse/pull/8519) ([vzakaznikov](https://github.com/vzakaznikov))
|
||||
* Add additional checks for external dictionaries created from DDL-queries. [#8127](https://github.com/ClickHouse/ClickHouse/pull/8127) ([alesapin](https://github.com/alesapin))
|
||||
* Fix error `Column ... already exists` while using `FINAL` and `SAMPLE` together, e.g. `select count() from table final sample 1/2`. Fixes [#5186](https://github.com/ClickHouse/ClickHouse/issues/5186). [#7907](https://github.com/ClickHouse/ClickHouse/pull/7907) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
|
||||
* Now table the first argument of `joinGet` function can be table indentifier. [#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([Amos Bird](https://github.com/amosbird))
|
||||
* Now table the first argument of `joinGet` function can be table identifier. [#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([Amos Bird](https://github.com/amosbird))
|
||||
* Allow using `MaterializedView` with subqueries above `Kafka` tables. [#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov))
|
||||
* Now background moves between disks run it the seprate thread pool. [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon))
|
||||
* `SYSTEM RELOAD DICTIONARY` now executes synchronously. [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([Vitaly Baranov](https://github.com/vitlibar))
|
||||
|
@ -51,7 +51,7 @@ struct StringRef
|
||||
};
|
||||
|
||||
/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/
|
||||
/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetics
|
||||
/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic
|
||||
/// and the UBSan thinks that something like nullptr + 8 is UB.
|
||||
constexpr const inline char empty_string_ref_addr{};
|
||||
constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0};
|
||||
|
@ -11,11 +11,11 @@ CFLAGS (GLOBAL -DDBMS_VERSION_MAJOR=${VERSION_MAJOR})
|
||||
CFLAGS (GLOBAL -DDBMS_VERSION_MINOR=${VERSION_MINOR})
|
||||
CFLAGS (GLOBAL -DDBMS_VERSION_PATCH=${VERSION_PATCH})
|
||||
CFLAGS (GLOBAL -DVERSION_FULL=\"\\\"${VERSION_FULL}\\\"\")
|
||||
CFLAGS (GLOBAL -DVERSION_MAJOR=${VERSION_MAJOR})
|
||||
CFLAGS (GLOBAL -DVERSION_MINOR=${VERSION_MINOR})
|
||||
CFLAGS (GLOBAL -DVERSION_MAJOR=${VERSION_MAJOR})
|
||||
CFLAGS (GLOBAL -DVERSION_MINOR=${VERSION_MINOR})
|
||||
CFLAGS (GLOBAL -DVERSION_PATCH=${VERSION_PATCH})
|
||||
|
||||
# TODO: not supported yet, not sure if ya.make supports arithmetics.
|
||||
# TODO: not supported yet, not sure if ya.make supports arithmetic.
|
||||
CFLAGS (GLOBAL -DVERSION_INTEGER=0)
|
||||
|
||||
CFLAGS (GLOBAL -DVERSION_NAME=\"\\\"${VERSION_NAME}\\\"\")
|
||||
|
@ -192,7 +192,7 @@ set(SRCS
|
||||
${HDFS3_SOURCE_DIR}/common/FileWrapper.h
|
||||
)
|
||||
|
||||
# old kernels (< 3.17) doens't have SYS_getrandom. Always use POSIX implementation to have better compatibility
|
||||
# old kernels (< 3.17) doesn't have SYS_getrandom. Always use POSIX implementation to have better compatibility
|
||||
set_source_files_properties(${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp PROPERTIES COMPILE_FLAGS "-DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX=1")
|
||||
|
||||
# target
|
||||
|
@ -63,7 +63,7 @@ function configure
|
||||
# Make copies of the original db for both servers. Use hardlinks instead
|
||||
# of copying to save space. Before that, remove preprocessed configs and
|
||||
# system tables, because sharing them between servers with hardlinks may
|
||||
# lead to weird effects.
|
||||
# lead to weird effects.
|
||||
rm -r left/db ||:
|
||||
rm -r right/db ||:
|
||||
rm -r db0/preprocessed_configs ||:
|
||||
@ -82,7 +82,7 @@ function restart
|
||||
export MALLOC_CONF="confirm_conf:true"
|
||||
|
||||
set -m # Spawn servers in their own process groups
|
||||
|
||||
|
||||
left/clickhouse-server --config-file=left/config/config.xml \
|
||||
-- --path left/db --user_files_path left/db/user_files \
|
||||
&>> left-server-log.log &
|
||||
@ -208,7 +208,7 @@ function run_tests
|
||||
echo test "$test_name"
|
||||
|
||||
# Don't profile if we're past the time limit.
|
||||
# Use awk because bash doesn't support floating point arithmetics.
|
||||
# Use awk because bash doesn't support floating point arithmetic.
|
||||
profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }")
|
||||
|
||||
TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
|
||||
@ -541,10 +541,10 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
|
||||
as select
|
||||
abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
|
||||
abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
|
||||
|
||||
|
||||
not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
|
||||
not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
|
||||
|
||||
|
||||
left, right, diff, stat_threshold,
|
||||
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
|
||||
query_metric_stats.test test, query_metric_stats.query_index query_index,
|
||||
@ -767,7 +767,7 @@ create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
|
||||
-- The threshold for 2) is significantly larger than the threshold for 1), to
|
||||
-- avoid jitter.
|
||||
create view shortness
|
||||
as select
|
||||
as select
|
||||
(test, query_index) in
|
||||
(select * from file('analyze/marked-short-queries.tsv', TSV,
|
||||
'test text, query_index int'))
|
||||
|
@ -80,4 +80,4 @@ Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argu
|
||||
## See Also {#see-also}
|
||||
|
||||
- [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator
|
||||
- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type convertion functions
|
||||
- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions
|
||||
|
@ -551,7 +551,7 @@ formatReadableTimeDelta(column[, maximum_unit])
|
||||
**Parameters**
|
||||
|
||||
- `column` — A column with numeric time delta.
|
||||
- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years.
|
||||
- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years.
|
||||
|
||||
Example:
|
||||
|
||||
@ -1584,7 +1584,7 @@ isDecimalOverflow(d, [p])
|
||||
**Parameters**
|
||||
|
||||
- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `p` — precision. Optional. If omitted, the initial presicion of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
||||
**Returned values**
|
||||
|
||||
|
@ -169,7 +169,7 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL
|
||||
**See Also**
|
||||
|
||||
- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type
|
||||
- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type convertion functions
|
||||
- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions
|
||||
|
||||
## Logical Negation Operator {#logical-negation-operator}
|
||||
|
||||
|
@ -121,7 +121,7 @@ Defines storage time for values. Can be specified only for MergeTree-family tabl
|
||||
|
||||
## Column Compression Codecs {#codecs}
|
||||
|
||||
By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration.
|
||||
By default, ClickHouse applies the `lz4` compression method. For `MergeTree`-engine family you can change the default compression method in the [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) section of a server configuration.
|
||||
|
||||
You can also define the compression method for each individual column in the `CREATE TABLE` query.
|
||||
|
||||
@ -138,7 +138,7 @@ ENGINE = <Engine>
|
||||
...
|
||||
```
|
||||
|
||||
The `Default` codec can be specified to reference default compression which may dependend on different settings (and properties of data) in runtime.
|
||||
The `Default` codec can be specified to reference default compression which may depend on different settings (and properties of data) in runtime.
|
||||
Example: `value UInt64 CODEC(Default)` — the same as lack of codec specification.
|
||||
|
||||
Also you can remove current CODEC from the column and use default compression from config.xml:
|
||||
@ -149,7 +149,7 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default);
|
||||
|
||||
Codecs can be combined in a pipeline, for example, `CODEC(Delta, Default)`.
|
||||
|
||||
To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. One thing to note is that codec can't be applied for ALIAS column type.
|
||||
To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. One thing to note is that codec can't be applied for ALIAS column type.
|
||||
|
||||
!!! warning "Warning"
|
||||
You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility.
|
||||
|
@ -152,7 +152,7 @@ void LocalServer::tryInitPath()
|
||||
default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed());
|
||||
|
||||
if (exists(default_path))
|
||||
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessfull attempt to create working directory: {} exist!", default_path.string());
|
||||
throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} exist!", default_path.string());
|
||||
|
||||
create_directory(default_path);
|
||||
temporary_directory_to_delete = default_path;
|
||||
|
@ -270,7 +270,7 @@
|
||||
This parameter is mandatory and cannot be empty.
|
||||
roles - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
|
||||
If no roles are specified, user will not be able to perform any actions after authentication.
|
||||
If any of the listed roles is not defined locally at the time of authentication, the authenthication attept
|
||||
If any of the listed roles is not defined locally at the time of authentication, the authenthication attempt
|
||||
will fail as if the provided password was incorrect.
|
||||
Example:
|
||||
<ldap>
|
||||
|
@ -585,7 +585,7 @@ void IAccessStorage::throwInvalidPassword()
|
||||
|
||||
void IAccessStorage::throwCannotAuthenticate(const String & user_name)
|
||||
{
|
||||
/// We use the same message for all authentification failures because we don't want to give away any unnecessary information for security reasons,
|
||||
/// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons,
|
||||
/// only the log will show the exact reason.
|
||||
throw Exception(user_name + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED);
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ public:
|
||||
{
|
||||
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
|
||||
if constexpr (is_big_int_v<T>)
|
||||
// is data_to empty? we should probaly use std::vector::insert then
|
||||
// is data_to empty? we should probably use std::vector::insert then
|
||||
for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++)
|
||||
data_to.push_back(*it);
|
||||
else
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
* @see DB::ColumnUnique
|
||||
*
|
||||
* The most common example uses https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/ columns.
|
||||
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contigous memory
|
||||
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
|
||||
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
|
||||
* index in the dictionary, which can be used to operate with the indices column.
|
||||
*/
|
||||
|
@ -5,15 +5,15 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Helper class, that recieves file descriptor and does fsync for it in destructor.
|
||||
/// Helper class, that receives file descriptor and does fsync for it in destructor.
|
||||
/// It's used to keep descriptor open, while doing some operations with it, and do fsync at the end.
|
||||
/// Guaranties of sequence 'close-reopen-fsync' may depend on kernel version.
|
||||
/// Source: linux-fsdevel mailing-list https://marc.info/?l=linux-fsdevel&m=152535409207496
|
||||
class FileSyncGuard
|
||||
{
|
||||
public:
|
||||
/// NOTE: If you have already opened descriptor, it's preffered to use
|
||||
/// this constructor instead of construnctor with path.
|
||||
/// NOTE: If you have already opened descriptor, it's preferred to use
|
||||
/// this constructor instead of constructor with path.
|
||||
FileSyncGuard(const DiskPtr & disk_, int fd_) : disk(disk_), fd(fd_) {}
|
||||
|
||||
FileSyncGuard(const DiskPtr & disk_, const String & path)
|
||||
|
@ -234,13 +234,13 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);
|
||||
|
||||
job();
|
||||
/// job should be reseted before decrementing scheduled_jobs to
|
||||
/// job should be reset before decrementing scheduled_jobs to
|
||||
/// ensure that the Job destroyed before wait() returns.
|
||||
job = {};
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
/// job should be reseted before decrementing scheduled_jobs to
|
||||
/// job should be reset before decrementing scheduled_jobs to
|
||||
/// ensure that the Job destroyed before wait() returns.
|
||||
job = {};
|
||||
|
||||
|
@ -152,7 +152,7 @@ void TraceCollector::run()
|
||||
if (trace_log)
|
||||
{
|
||||
// time and time_in_microseconds are both being constructed from the same timespec so that the
|
||||
// times will be equal upto the precision of a second.
|
||||
// times will be equal up to the precision of a second.
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
|
||||
|
@ -1288,7 +1288,7 @@ void ZooKeeper::receiveEvent()
|
||||
response->removeRootPath(root_path);
|
||||
}
|
||||
|
||||
/// Instead of setting the watch in sendEvent, set it in receiveEvent becuase need to check the response.
|
||||
/// Instead of setting the watch in sendEvent, set it in receiveEvent because need to check the response.
|
||||
/// The watch shouldn't be set if the node does not exist and it will never exist like sequential ephemeral nodes.
|
||||
/// By using getData() instead of exists(), a watch won't be set if the node doesn't exist.
|
||||
if (request_info.watch)
|
||||
|
@ -87,7 +87,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
|
||||
else
|
||||
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
|
||||
/// Default codec replaced with current default codec which may dependend on different
|
||||
/// Default codec replaced with current default codec which may depend on different
|
||||
/// settings (and properties of data) in runtime.
|
||||
CompressionCodecPtr result_codec;
|
||||
if (codec_family_name == DEFAULT_CODEC_NAME)
|
||||
|
@ -26,7 +26,7 @@ void ICompressionCodec::setCodecDescription(const String & codec_name, const AST
|
||||
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
|
||||
result->name = "CODEC";
|
||||
|
||||
/// Special case for codec Multiple, which doens't have name. It's just list
|
||||
/// Special case for codec Multiple, which doesn't have name. It's just list
|
||||
/// of other codecs.
|
||||
if (codec_name.empty())
|
||||
{
|
||||
|
@ -705,7 +705,7 @@ namespace MySQLReplication
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw ReplicationError("Position update with unsupport event", ErrorCodes::LOGICAL_ERROR);
|
||||
throw ReplicationError("Position update with unsupported event", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -390,7 +390,7 @@ class IColumn;
|
||||
M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
|
||||
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
|
||||
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
|
||||
\
|
||||
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
|
||||
\
|
||||
|
@ -29,7 +29,7 @@ constexpr size_t min(size_t x, size_t y)
|
||||
}
|
||||
|
||||
/// @note There's no auto scale to larger big integer, only for integral ones.
|
||||
/// It's cause of (U)Int64 backward compatibilty and very big performance penalties.
|
||||
/// It's cause of (U)Int64 backward compatibility and very big performance penalties.
|
||||
constexpr size_t nextSize(size_t size)
|
||||
{
|
||||
if (size < 8)
|
||||
|
@ -116,7 +116,7 @@ void DatabaseAtomic::dropTable(const Context &, const String & table_name, bool
|
||||
}
|
||||
tryRemoveSymlink(table_name);
|
||||
/// Remove the inner table (if any) to avoid deadlock
|
||||
/// (due to attemp to execute DROP from the worker thread)
|
||||
/// (due to attempt to execute DROP from the worker thread)
|
||||
if (auto * mv = dynamic_cast<StorageMaterializedView *>(table.get()))
|
||||
mv->dropInnerTable(no_delay);
|
||||
/// Notify DatabaseCatalog that table was dropped. It will remove table data in background.
|
||||
|
@ -11,7 +11,7 @@ class Context;
|
||||
class ASTStorage;
|
||||
|
||||
#define LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \
|
||||
M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \
|
||||
|
||||
/// Settings that should not change after the creation of a database.
|
||||
#define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \
|
||||
|
@ -326,7 +326,7 @@ struct DecimalBinaryOperation
|
||||
}
|
||||
|
||||
private:
|
||||
/// there's implicit type convertion here
|
||||
/// there's implicit type conversion here
|
||||
static NativeResultType apply(NativeResultType a, NativeResultType b)
|
||||
{
|
||||
if constexpr (can_overflow && check_overflow)
|
||||
|
@ -577,7 +577,7 @@ private:
|
||||
auto input_value = input_column->getDataAt(r);
|
||||
if constexpr (mode == CipherMode::RFC5116_AEAD_AES_GCM)
|
||||
{
|
||||
// empty plaintext results in empty ciphertext + tag, means there should be atleast tag_size bytes.
|
||||
// empty plaintext results in empty ciphertext + tag, means there should be at least tag_size bytes.
|
||||
if (input_value.size < tag_size)
|
||||
throw Exception(fmt::format("Encrypted data is too short: only {} bytes, "
|
||||
"should contain at least {} bytes of a tag.",
|
||||
|
@ -131,7 +131,7 @@ public:
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
/// Virtual call is Ok (neglible comparing to the rest of calculations).
|
||||
/// Virtual call is Ok (negligible comparing to the rest of calculations).
|
||||
Float64 value = arguments[0].column->getFloat64(i);
|
||||
|
||||
bool is_negative = value < 0;
|
||||
|
@ -22,7 +22,7 @@ namespace
|
||||
{
|
||||
|
||||
/// Returns 1 if and Decimal value has more digits then it's Precision allow, 0 otherwise.
|
||||
/// Precision could be set as second argument or omitted. If ommited function uses Decimal presicion of the first argument.
|
||||
/// Precision could be set as second argument or omitted. If omitted function uses Decimal precision of the first argument.
|
||||
class FunctionIsDecimalOverflow : public IFunction
|
||||
{
|
||||
public:
|
||||
|
@ -166,7 +166,7 @@ void ThreadStatus::initPerformanceCounters()
|
||||
memory_tracker.setDescription("(for thread)");
|
||||
|
||||
// query_start_time_{microseconds, nanoseconds} are all constructed from the same time point
|
||||
// to ensure that they are all equal upto the precision of a second.
|
||||
// to ensure that they are all equal up to the precision of a second.
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
|
||||
query_start_time_nanoseconds = time_in_nanoseconds(now);
|
||||
|
@ -221,7 +221,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, Context & c
|
||||
|
||||
// all callers to onExceptionBeforeStart method construct the timespec for event_time and
|
||||
// event_time_microseconds from the same time point. So, it can be assumed that both of these
|
||||
// times are equal upto the precision of a second.
|
||||
// times are equal up to the precision of a second.
|
||||
elem.event_time = current_time_us / 1000000;
|
||||
elem.event_time_microseconds = current_time_us;
|
||||
elem.query_start_time = current_time_us / 1000000;
|
||||
@ -703,7 +703,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING;
|
||||
|
||||
// event_time and event_time_microseconds are being constructed from the same time point
|
||||
// to ensure that both the times will be equal upto the precision of a second.
|
||||
// to ensure that both the times will be equal up to the precision of a second.
|
||||
const auto time_now = std::chrono::system_clock::now();
|
||||
|
||||
elem.event_time = time_in_seconds(time_now);
|
||||
|
@ -927,7 +927,7 @@ void obfuscateQueries(
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Everyting else is kept as is.
|
||||
/// Everything else is kept as is.
|
||||
result.write(token.begin, token.size());
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
Status prepare() override;
|
||||
void work() override;
|
||||
|
||||
/// Adds additional port fo totals.
|
||||
/// Adds additional port for totals.
|
||||
/// If added, totals will have been ready by the first generate() call (in totals chunk).
|
||||
InputPort * addTotalsPort();
|
||||
|
||||
|
@ -353,7 +353,7 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
|
||||
return checksum->second.file_size;
|
||||
}
|
||||
|
||||
String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const
|
||||
String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const
|
||||
{
|
||||
const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||
auto alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
|
||||
|
@ -145,7 +145,7 @@ public:
|
||||
|
||||
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
||||
/// If no checksums are present returns the name of the first physically existing column.
|
||||
String getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const;
|
||||
String getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const;
|
||||
|
||||
bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }
|
||||
|
||||
|
@ -95,7 +95,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada
|
||||
*/
|
||||
if (!have_at_least_one_physical_column)
|
||||
{
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(metadata_snapshot);
|
||||
const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(metadata_snapshot);
|
||||
columns.push_back(minimum_size_column_name);
|
||||
/// correctly report added column
|
||||
injected_columns.insert(columns.back());
|
||||
|
@ -298,7 +298,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
|
||||
if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
|
||||
{
|
||||
/// TTL delete is prefered to recompression
|
||||
/// TTL delete is preferred to recompression
|
||||
TTLDeleteMergeSelector delete_ttl_selector(
|
||||
next_delete_ttl_merge_times_by_partition,
|
||||
current_time,
|
||||
|
@ -49,7 +49,7 @@ struct MergeTreeDataPartTTLInfos
|
||||
|
||||
TTLInfoMap recompression_ttl;
|
||||
|
||||
/// Return smalles max recompression TTL value
|
||||
/// Return the smallest max recompression TTL value
|
||||
time_t getMinimalMaxRecompressionTTL() const;
|
||||
|
||||
|
||||
|
@ -1517,7 +1517,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
{
|
||||
/// In case when SELECT's predicate defines a single continuous interval of keys,
|
||||
/// we can use binary search algorithm to find the left and right endpoint key marks of such interval.
|
||||
/// The returned value is the minumum range of marks, containing all keys for which KeyCondition holds
|
||||
/// The returned value is the minimum range of marks, containing all keys for which KeyCondition holds
|
||||
|
||||
LOG_TRACE(log, "Running binary search on index range for part {} ({} marks)", part->name, marks_count);
|
||||
|
||||
|
@ -36,7 +36,7 @@ private:
|
||||
Poco::Logger * log;
|
||||
std::atomic<bool> need_stop {false};
|
||||
|
||||
// We need it besides `storage.is_readonly`, bacause `shutdown()` may be called many times, that way `storage.is_readonly` will not change.
|
||||
// We need it besides `storage.is_readonly`, because `shutdown()` may be called many times, that way `storage.is_readonly` will not change.
|
||||
bool incr_readonly = false;
|
||||
|
||||
/// The random data we wrote into `/replicas/me/is_active`.
|
||||
|
@ -84,7 +84,7 @@ public:
|
||||
time_t getTTLForPart(const IMergeSelector::Part & part) const override;
|
||||
|
||||
/// Checks that part's codec is not already equal to required codec
|
||||
/// according to recompression TTL. It doesn't make sence to assign such
|
||||
/// according to recompression TTL. It doesn't make sense to assign such
|
||||
/// merge.
|
||||
bool isTTLAlreadySatisfied(const IMergeSelector::Part & part) const override;
|
||||
private:
|
||||
|
@ -155,7 +155,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe()
|
||||
.onError([&](const char * message)
|
||||
{
|
||||
/* End up here either if channel ends up in an error state (then there will be resubscription) or consume call error, which
|
||||
* arises from queue settings mismatch or queue level error, which should not happen as noone else is supposed to touch them
|
||||
* arises from queue settings mismatch or queue level error, which should not happen as no one else is supposed to touch them
|
||||
*/
|
||||
LOG_ERROR(log, "Consumer failed on channel {}. Reason: {}", channel_id, message);
|
||||
wait_subscription.store(false);
|
||||
@ -173,16 +173,16 @@ bool ReadBufferFromRabbitMQConsumer::ackMessages()
|
||||
*/
|
||||
if (record_info.channel_id == channel_id && record_info.delivery_tag && record_info.delivery_tag > prev_tag)
|
||||
{
|
||||
/// Commit all received messages with delivery tags from last commited to last inserted
|
||||
/// Commit all received messages with delivery tags from last committed to last inserted
|
||||
if (!consumer_channel->ack(record_info.delivery_tag, AMQP::multiple))
|
||||
{
|
||||
LOG_ERROR(log, "Failed to commit messages with delivery tags from last commited to {} on channel {}",
|
||||
LOG_ERROR(log, "Failed to commit messages with delivery tags from last committed to {} on channel {}",
|
||||
record_info.delivery_tag, channel_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
prev_tag = record_info.delivery_tag;
|
||||
LOG_TRACE(log, "Consumer commited messages with deliveryTags up to {} on channel {}", record_info.delivery_tag, channel_id);
|
||||
LOG_TRACE(log, "Consumer committed messages with deliveryTags up to {} on channel {}", record_info.delivery_tag, channel_id);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -406,7 +406,7 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting)
|
||||
connection->close(); /// Connection might be unusable, but not closed
|
||||
|
||||
/* Connection is not closed immediately (firstly, all pending operations are completed, and then
|
||||
* an AMQP closing-handshake is performed). But cannot open a new connection untill previous one is properly closed
|
||||
* an AMQP closing-handshake is performed). But cannot open a new connection until previous one is properly closed
|
||||
*/
|
||||
while (!connection->closed() && ++cnt_retries != RETRIES_MAX)
|
||||
event_handler->iterateLoop();
|
||||
@ -731,7 +731,7 @@ bool StorageRabbitMQ::streamToViews()
|
||||
auto column_names = block_io.out->getHeader().getNames();
|
||||
auto sample_block = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID());
|
||||
|
||||
/* event_handler->connectionRunning() does not guarantee that connnection is not closed in case loop was not running before, but
|
||||
/* event_handler->connectionRunning() does not guarantee that connection is not closed in case loop was not running before, but
|
||||
* need to anyway start the loop to activate error callbacks and update connection state, because even checking with
|
||||
* connection->usable() will not give correct answer before callbacks are activated.
|
||||
*/
|
||||
|
@ -76,20 +76,20 @@ private:
|
||||
String channel_id;
|
||||
|
||||
/* payloads.queue:
|
||||
* - payloads are pushed to queue in countRow and poped by another thread in writingFunc, each payload gets into queue only once
|
||||
* - payloads are pushed to queue in countRow and popped by another thread in writingFunc, each payload gets into queue only once
|
||||
* returned.queue:
|
||||
* - payloads are pushed to queue:
|
||||
* 1) inside channel->onError() callback if channel becomes unusable and the record of pending acknowledgements from server
|
||||
* is non-empty.
|
||||
* 2) inside removeRecord() if received nack() - negative acknowledgement from the server that message failed to be written
|
||||
* to disk or it was unable to reach the queue.
|
||||
* - payloads are poped from the queue once republished
|
||||
* - payloads are popped from the queue once republished
|
||||
*/
|
||||
ConcurrentBoundedQueue<std::pair<UInt64, String>> payloads, returned;
|
||||
|
||||
/* Counter of current delivery on a current channel. Delivery tags are scoped per channel. The server attaches a delivery tag for each
|
||||
* published message - a serial number of delivery on current channel. Delivery tag is a way of server to notify publisher if it was
|
||||
* able or unable to process delivery, i.e. it sends back a responce with a corresponding delivery tag.
|
||||
* able or unable to process delivery, i.e. it sends back a response with a corresponding delivery tag.
|
||||
*/
|
||||
UInt64 delivery_tag = 0;
|
||||
|
||||
@ -100,7 +100,7 @@ private:
|
||||
*/
|
||||
bool wait_all = true;
|
||||
|
||||
/* false: untill writeSuffix is called
|
||||
/* false: until writeSuffix is called
|
||||
* true: means payloads.queue will not grow anymore
|
||||
*/
|
||||
std::atomic<UInt64> wait_num = 0;
|
||||
|
@ -21,8 +21,8 @@ def cluster():
|
||||
|
||||
def test_different_types(cluster):
|
||||
node = cluster.instances["node"]
|
||||
responce = node.query("SELECT * FROM system.disks")
|
||||
disks = responce.split("\n")
|
||||
response = node.query("SELECT * FROM system.disks")
|
||||
disks = response.split("\n")
|
||||
for disk in disks:
|
||||
if disk == '': # skip empty line (after split at last position)
|
||||
continue
|
||||
|
@ -26,7 +26,7 @@ def bootstrap():
|
||||
# just after server starts (+ 2 seconds, reload timeout).
|
||||
#
|
||||
# And on configuration reload the clusters will be re-created, so some
|
||||
# internal stuff will be reseted:
|
||||
# internal stuff will be reset:
|
||||
# - error_count
|
||||
# - last_used (round_robing)
|
||||
#
|
||||
|
@ -45,7 +45,7 @@ select 12 as p, geohashEncode(longitude, latitude, p) as actual, if(actual = enc
|
||||
|
||||
-- Here results are floats, and hence may not be compared for equality directly.
|
||||
-- We select all values that are off by some reasonable value:
|
||||
-- each byte of encoded string provides 5 bits of precison, (roughly 2.5 for lon and lat)
|
||||
-- each byte of encoded string provides 5 bits of precision, (roughly 2.5 for lon and lat)
|
||||
-- each bit of precision divides value range by 2.
|
||||
-- hence max error is roughly value range 2.5 times divided by 2 for each precision bit.
|
||||
-- initial value range is [-90..90] for latitude and [-180..180] for longitude.
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Regression for MemoryTracker that had been incorrectly accounted
|
||||
# (it was reseted before deallocation)
|
||||
# (it was reset before deallocation)
|
||||
#
|
||||
# For this will be used:
|
||||
# - two-level group by
|
||||
|
@ -6,11 +6,11 @@
|
||||
**Анализ схемы**
|
||||
|
||||
По запросу необходимо определить возможные таблицы. Имея строку запроса можно понять, какие его части обозначают названия таблиц, таким образом можно определить их количество в нашей базе данных.
|
||||
В парсере Clickhouse поддеревом запроса, отвечающее за таблицы из которых мы берем данные, является TABLES (Рисунок 1), в нем лежит основная таблица, из которой берутся колонки, а также операции JOIN, которые совершаются в запросе. Обходя все вершины в поддереве мы берем названия таблиц и баз данных в которых они лежат, а также их алиас, то есть укороченные названия, выбранные автором запроса. Эти названия могут понадобиться нам для определения принадлежности колонки в дальнейшем.
|
||||
В парсере Clickhouse поддеревом запроса, отвечающее за таблицы из которых мы берем данные, является TABLES (Рисунок 1), в нем лежит основная таблица, из которой берутся колонки, а также операции JOIN, которые совершаются в запросе. Обходя все вершины в поддереве мы берем названия таблиц и баз данных в которых они лежат, а также их алиас, то есть укороченные названия, выбранные автором запроса. Эти названия могут понадобиться нам для определения принадлежности колонки в дальнейшем.
|
||||
Таким образом для запроса мы получаем набор баз данных, а также таблиц и их условных обозначений (алиасов), по которым делается запрос.
|
||||
|
||||
Затем нам необходимо определить множество столбцов, которые присутствуют в запросе и таблицы, к которым они могут относиться. Во время исполнения запроса уже известно множество столбцов в каждой таблице, поэтому при исполнении программа автоматически связывает столбец и таблицу, однако в нашем случае нельзя однозначно трактовать принадлежность столбца к определенной таблице, например в следующем запросе: “SELECT column1, column2, column3 FROM table1 JOIN table2 on table1.column2 = table2.column3 ”. Здесь мы однозначно можем сказать, к какой таблице относятся колонки column2 и column3, однако column1 может принадлежать как первой, так и второй таблице. Для однозначности трактовки таких случаев, мы будем относить данную неопределенные колонки к основной таблице, по которой делается запрос, например в данном случае это будет таблица table1.
|
||||
Все столбцы в дереве лежат в вершинах типа INDENTIFIER, которые находятся в поддеревьях SELECT, TABLES, WHERE, GROUP_BY, HAVING, ORDER_BY. Рекурсивно обходя поддеревья мы формируем множество всех таблиц, затем мы разделяем колонку на составляющие: таблица (если она явно указана через точку) и само название, затем, так как таблица может являться алиасом, мы заменяем алиас на оригинальное название таблицы. Теперь у нас есть список всех столбцов и таблиц, к которым они относятся, для столбцов без таблиц определяем основную таблицу запроса.
|
||||
Все столбцы в дереве лежат в вершинах типа IDENTIFIER, которые находятся в поддеревьях SELECT, TABLES, WHERE, GROUP_BY, HAVING, ORDER_BY. Рекурсивно обходя поддеревья мы формируем множество всех таблиц, затем мы разделяем колонку на составляющие: таблица (если она явно указана через точку) и само название, затем, так как таблица может являться алиасом, мы заменяем алиас на оригинальное название таблицы. Теперь у нас есть список всех столбцов и таблиц, к которым они относятся, для столбцов без таблиц определяем основную таблицу запроса.
|
||||
|
||||
**Анализ столбцов**
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
Определить значения столбцов мы можем используя логический, арифметические и другие функции над значениями столбцов, которые указаны в запросе. Такие функции лежат в поддеревьях SELECT и WHERE. Параметром функции может быть константа, колонка либо другая функция (Рисунок 2). Таким образом для понимания типа колонки могут помочь следующие параметры: 1) Типы аргументов, которые может принимать функция, например функция TOSTARTOFMINUTE(округляет время до кратного 5 минутам вниз) может принимать только DATETIME, таким образом если аргументом данной функции является колонка, то данная колонка имеет тип DATETIME. 2) типы остальных аргументов в данной функции, например функция EQUALS(равенство), она подразумевает собой равенство типов ее аргументов, таким образом если в данной функции присутствует константа и столбец, то мы можем определить тип столбца как тип константы.
|
||||
|
||||
Таким образом, для каждой функции мы определяем возможные типы аргументов, тип возвращаемого значения, а также параметр, являются ли аргументы функции одинакового типа. Рекурсивный обработчик функций будет определять возможные типы столбцов использующихся в данных функциях по значениям аргументов и возвращать возможные типы результата выполнения функции.
|
||||
Теперь для каждого столбца мы имеем множество возможных типов его значений. Для однозначной трактовки запроса мы выберем один конкретный тип из этого множества.
|
||||
Теперь для каждого столбца мы имеем множество возможных типов его значений. Для однозначной трактовки запроса мы выберем один конкретный тип из этого множества.
|
||||
|
||||
**Определение значений столбцов**
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user