Merge branch 'master' into revive-cache-contention-fix

This commit is contained in:
Kseniia Sumarokova 2024-03-13 16:52:00 +01:00 committed by GitHub
commit bc9dd7bb0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
94 changed files with 1825 additions and 729 deletions

View File

@ -108,16 +108,22 @@
{
[[noreturn]] void abortOnFailedAssertion(const String & description);
}
#define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
#define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
#define UNREACHABLE() abort()
// clang-format off
#else
/// Here sizeof() trick is used to suppress unused warning for result,
/// since simple "(void)x" will evaluate the expression, while
/// "sizeof(!(x))" will not.
#define chassert(x) (void)sizeof(!(x))
#define chassert_1(x, ...) (void)sizeof(!(x))
#define chassert_2(x, comment, ...) (void)sizeof(!(x))
#define UNREACHABLE() __builtin_unreachable()
#endif
#define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
#define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
#define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
#endif
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.

2
contrib/curl vendored

@ -1 +1 @@
Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008
Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873

View File

@ -78,8 +78,8 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun
#### Setup the Debian repository
``` bash
sudo apt-get install -y apt-transport-https ca-certificates dirmngr
sudo gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list

View File

@ -10,6 +10,8 @@ sidebar_label: Nullable
Returns whether the argument is [NULL](../../sql-reference/syntax.md#null).
See also operator [`IS NULL`](../operators/index.md#is_null).
``` sql
isNull(x)
```
@ -54,6 +56,8 @@ Result:
Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).
See also operator [`IS NOT NULL`](../operators/index.md#is_not_null).
``` sql
isNotNull(x)
```

View File

@ -5,80 +5,372 @@ sidebar_label: JSON
---
There are two sets of functions to parse JSON.
- `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
- `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
- `JSONExtract*` is made to parse normal JSON.
# visitParam functions
# simpleJSON/visitParam functions
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done.
The following assumptions are made:
1. The field name (function argument) must be a constant.
2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used.
4. The JSON does not have space characters outside of string literals.
## visitParamHas(params, name)
## simpleJSONHas
Checks whether there is a field with the `name` name.
Checks whether there is a field named `field_name`. The result is `UInt8`.
Alias: `simpleJSONHas`.
**Syntax**
## visitParamExtractUInt(params, name)
Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.
Alias: `simpleJSONExtractUInt`.
## visitParamExtractInt(params, name)
The same as for Int64.
Alias: `simpleJSONExtractInt`.
## visitParamExtractFloat(params, name)
The same as for Float64.
Alias: `simpleJSONExtractFloat`.
## visitParamExtractBool(params, name)
Parses a true/false value. The result is UInt8.
Alias: `simpleJSONExtractBool`.
## visitParamExtractRaw(params, name)
Returns the value of a field, including separators.
Alias: `simpleJSONExtractRaw`.
Examples:
``` sql
visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
```sql
simpleJSONHas(json, field_name)
```
## visitParamExtractString(params, name)
**Parameters**
Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
Alias: `simpleJSONExtractString`.
**Returned value**
Examples:
It returns `1` if the field exists, `0` otherwise.
``` sql
visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
visitParamExtractString('{"abc":"hello}', 'abc') = '';
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONHas(json, 'foo') FROM jsons;
SELECT simpleJSONHas(json, 'bar') FROM jsons;
```
```response
1
0
```
## simpleJSONExtractUInt
Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractUInt(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"4e3"}');
INSERT INTO jsons VALUES ('{"foo":3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
4
0
3
5
```
## simpleJSONExtractInt
Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractInt(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
-4
0
-3
5
```
## simpleJSONExtractFloat
Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
**Syntax**
```sql
simpleJSONExtractFloat(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
-4000
0
-3.4
5
```
## simpleJSONExtractBool
Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`.
**Syntax**
```sql
simpleJSONExtractBool(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases:
- If the field doesn't exists.
- If the field contains `true` as a string, e.g.: `{"field":"true"}`.
- If the field contains `1` as a numerical value.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;
```
```response
0
1
0
0
```
## simpleJSONExtractRaw
Returns the value of the field named `field_name` as a `String`, including separators.
**Syntax**
```sql
simpleJSONExtractRaw(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;
```
```response
"-4e3"
-3.4
5
{"def":[1,2,3]}
```
## simpleJSONExtractString
Parses `String` in double quotes from the value of the field named `field_name`.
**Syntax**
```sql
simpleJSONExtractString(json, field_name)
```
**Parameters**
- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
- `field_name`: The name of the field to search for. [String literal](../syntax#string)
**Returned value**
It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
**Implementation details**
There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
**Example**
Query:
```sql
CREATE TABLE jsons
(
`json` String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
INSERT INTO jsons VALUES ('{"foo":"hello}');
SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;
```
```response
\n\0
```
## visitParamHas
This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas).
## visitParamExtractUInt
This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint).
## visitParamExtractInt
This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint).
## visitParamExtractFloat
This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat).
## visitParamExtractBool
This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool).
## visitParamExtractRaw
This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw).
## visitParamExtractString
This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring).
# JSONExtract functions
The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements.

View File

@ -299,6 +299,18 @@ sin(x)
Type: [Float*](../../sql-reference/data-types/float.md).
**Example**
Query:
```sql
SELECT sin(1.23);
```
```response
0.9424888019316975
```
## cos
Returns the cosine of the argument.

File diff suppressed because it is too large Load Diff

View File

@ -588,8 +588,41 @@ Result:
## substringUTF8
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Syntax**
```sql
substringUTF8(s, offset[, length])
```
**Arguments**
- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
**Returned value**
A substring of `s` with `length` many bytes, starting at index `offset`.
**Implementation details**
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Example**
```sql
SELECT 'Täglich grüßt das Murmeltier.' AS str,
substringUTF8(str, 9),
substringUTF8(str, 9, 5)
```
```response
Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt
```
## substringIndex
@ -624,7 +657,39 @@ Result:
## substringIndexUTF8
Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points.
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Syntax**
```sql
substringIndexUTF8(s, delim, count)
```
**Arguments**
- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
**Returned value**
A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
**Implementation details**
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
**Example**
```sql
SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2)
```
```response
www.straßen-in-europa
```
## appendTrailingCharIfAbsent

View File

@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The
ClickHouse supports the `IS NULL` and `IS NOT NULL` operators.
### IS NULL
### IS NULL {#is_null}
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns:
- `1`, if the value is `NULL`.
@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
└──────────────┘
```
### IS NOT NULL
### IS NOT NULL {#is_not_null}
- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns:
- `0`, if the value is `NULL`.

View File

@ -5,7 +5,7 @@ sidebar_label: cluster
title: "cluster, clusterAllReplicas"
---
Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried.
Allows to access all shards (configured in the `remote_servers` section) of a cluster without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Only one replica of each shard is queried.
`clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.

View File

@ -50,6 +50,7 @@
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
#include <Formats/FormatFactory.h>
namespace fs = std::filesystem;
using namespace std::literals;
@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description,
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
void Client::processConfig()
{
if (!queries.empty() && config().has("queries-file"))
@ -1173,7 +1181,14 @@ void Client::processConfig()
pager = config().getString("pager", "");
is_default_format = !config().has("vertical") && !config().has("format");
if (config().has("vertical"))
if (is_default_format && checkIfStdoutIsRegularFile())
{
is_default_format = false;
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TabSeparated";
}
else if (config().has("vertical"))
format = config().getString("format", "Vertical");
else
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");

View File

@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile()
return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
static bool checkIfStdoutIsRegularFile()
{
struct stat file_stat;
return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
}
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
@ -638,7 +646,14 @@ void LocalServer::processConfig()
if (config().has("macros"))
global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile())
{
std::optional<String> format_from_file_name;
format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
format = format_from_file_name ? *format_from_file_name : "TSV";
}
else
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
insert_format = "Values";
/// Setting value from cmd arg overrides one from config

View File

@ -310,9 +310,11 @@ public:
{
for (Field & element : values)
{
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
/// We must initialize the Field type since some internal functions (like operator=) use them
new (&element) Field;
bool has_value = false;
readBinary(has_value, buf);
if (has_value)
serialization->deserializeBinary(element, buf, {});
}
}

View File

@ -14,6 +14,8 @@
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <boost/algorithm/string.hpp>
namespace DB
{

View File

@ -20,12 +20,12 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int DUPLICATE_COLUMN;
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXPERIMENTAL_FEATURE_ERROR;
}
namespace
@ -247,7 +247,7 @@ void ColumnObject::Subcolumn::checkTypes() const
prefix_types.push_back(current_type);
auto prefix_common_type = getLeastSupertype(prefix_types);
if (!prefix_common_type->equals(*current_type))
throw Exception(ErrorCodes::LOGICAL_ERROR,
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Data type {} of column at position {} cannot represent all columns from i-th prefix",
current_type->getName(), i);
}
@ -635,7 +635,7 @@ void ColumnObject::checkConsistency() const
{
if (num_rows != leaf->data.size())
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject."
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject."
" Subcolumn '{}' has {} rows, but expected size is {}",
leaf->path.getPath(), leaf->data.size(), num_rows);
}
@ -919,7 +919,7 @@ void ColumnObject::addSubcolumn(const PathInData & key, size_t new_size)
void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size)
{
if (!key.hasNested())
throw Exception(ErrorCodes::LOGICAL_ERROR,
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Cannot add Nested subcolumn, because path doesn't contain Nested");
bool inserted = false;

View File

@ -598,6 +598,7 @@
M(714, UNEXPECTED_CLUSTER) \
M(715, CANNOT_DETECT_FORMAT) \
M(716, CANNOT_FORGET_PARTITION) \
M(717, EXPERIMENTAL_FEATURE_ERROR) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -1,5 +1,7 @@
#include <Common/FunctionDocumentation.h>
#include <boost/algorithm/string.hpp>
namespace DB
{
@ -31,14 +33,7 @@ std::string FunctionDocumentation::examplesAsString() const
std::string FunctionDocumentation::categoriesAsString() const
{
if (categories.empty())
return "";
auto it = categories.begin();
std::string res = *it;
for (; it != categories.end(); ++it)
res += ", " + *it;
return res;
return boost::algorithm::join(categories, ", ");
}
}

View File

@ -374,7 +374,7 @@ void Context::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::Abstr
if (!shared->keeper_dispatcher)
return;
shared->keeper_dispatcher->updateConfiguration(getConfigRef(), getMacros());
shared->keeper_dispatcher->updateConfiguration(config_, getMacros());
}
std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const

View File

@ -28,9 +28,9 @@ namespace DB
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int LOGICAL_ERROR;
extern const int INCOMPATIBLE_COLUMNS;
extern const int NOT_IMPLEMENTED;
extern const int EXPERIMENTAL_FEATURE_ERROR;
}
size_t getNumberOfDimensions(const IDataType & type)
@ -92,7 +92,7 @@ ColumnPtr createArrayOfColumn(ColumnPtr column, size_t num_dimensions)
Array createEmptyArrayField(size_t num_dimensions)
{
if (num_dimensions == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create array field with 0 dimensions");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create array field with 0 dimensions");
Array array;
Array * current_array = &array;
@ -231,7 +231,7 @@ static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
};
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
}
void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot)
@ -247,7 +247,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto
GetColumnsOptions options(GetColumnsOptions::AllPhysical);
auto storage_column = storage_snapshot->tryGetColumn(options, column.name);
if (!storage_column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Column '{}' not found in storage", column.name);
auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name);
@ -315,7 +315,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che
{
const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
if (!type_tuple)
throw Exception(ErrorCodes::LOGICAL_ERROR,
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Least common type for object can be deduced only from tuples, but {} given", type->getName());
auto [tuple_paths, tuple_types] = flattenTuple(type);
@ -427,7 +427,7 @@ static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
}
DataTypePtr getLeastCommonTypeForDynamicColumns(
@ -481,7 +481,7 @@ DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage
return recreateTupleWithElements(*type_tuple, new_elements);
}
throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
}
bool hasDynamicSubcolumns(const ColumnsDescription & columns)
@ -613,7 +613,7 @@ DataTypePtr reduceNumberOfDimensions(DataTypePtr type, size_t dimensions_to_redu
{
const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
if (!type_array)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce");
type = type_array->getNestedType();
}
@ -627,7 +627,7 @@ ColumnPtr reduceNumberOfDimensions(ColumnPtr column, size_t dimensions_to_reduce
{
const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
if (!column_array)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce");
column = column_array->getDataPtr();
}
@ -653,7 +653,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
auto collect_tuple_elemets = [](const auto & children)
{
if (children.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create type from empty Tuple or Nested node");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create type from empty Tuple or Nested node");
std::vector<std::tuple<String, ColumnWithTypeAndDimensions>> tuple_elements;
tuple_elements.reserve(children.size());
@ -705,6 +705,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
size_t num_elements = tuple_columns.size();
Columns tuple_elements_columns(num_elements);
DataTypes tuple_elements_types(num_elements);
size_t last_offset = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets_columns.back()).getData().back();
/// Reduce extra array dimensions to get columns and types of Nested elements.
for (size_t i = 0; i < num_elements; ++i)
@ -712,6 +713,14 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
tuple_elements_columns[i] = reduceNumberOfDimensions(tuple_columns[i].column, tuple_columns[i].array_dimensions);
tuple_elements_types[i] = reduceNumberOfDimensions(tuple_columns[i].type, tuple_columns[i].array_dimensions);
if (tuple_elements_columns[i]->size() != last_offset)
throw Exception(
ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. "
"Data size: {}, last offset: {}",
node.path.getPath(),
tuple_elements_columns[i]->size(),
last_offset);
}
auto result_column = ColumnArray::create(ColumnTuple::create(tuple_elements_columns), offsets_columns.back());
@ -720,6 +729,16 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
/// Recreate result Array type and Array column.
for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
{
last_offset = assert_cast<const ColumnArray::ColumnOffsets &>((**it)).getData().back();
if (result_column->size() != last_offset)
throw Exception(
ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. "
"Data size: {}, last offset: {}",
node.path.getPath(),
result_column->size(),
last_offset);
result_column = ColumnArray::create(result_column, *it);
result_type = std::make_shared<DataTypeArray>(result_type);
}
@ -822,7 +841,7 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
assert(paths.size() == tuple_columns.size());
if (paths.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unflatten empty Tuple");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot unflatten empty Tuple");
/// We add all paths to the subcolumn tree and then create a type from it.
/// The tree stores column, type and number of array dimensions
@ -841,7 +860,7 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr<Node>
{
if (pos >= num_parts)
throw Exception(ErrorCodes::LOGICAL_ERROR,
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Not enough name parts for path {}. Expected at least {}, got {}",
paths[i].getPath(), pos + 1, num_parts);

View File

@ -29,7 +29,7 @@ namespace ErrorCodes
extern const int INCORRECT_DATA;
extern const int CANNOT_READ_ALL_DATA;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
extern const int EXPERIMENTAL_FEATURE_ERROR;
}
template <typename Parser>
@ -177,7 +177,7 @@ void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
auto * stream = settings.getter(settings.path);
if (!stream)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization");
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Missing stream for kind of binary serialization");
auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);
@ -288,7 +288,7 @@ void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(
if (!state_object->nested_type->equals(*tuple_type))
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Types of internal column of Object mismatched. Expected: {}, Got: {}",
state_object->nested_type->getName(), tuple_type->getName());
}

View File

@ -149,7 +149,9 @@ ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr
*/
ASTPtr DatabasesOverlay::getCreateDatabaseQuery() const
{
return std::make_shared<ASTCreateQuery>();
auto query = std::make_shared<ASTCreateQuery>();
query->setDatabase(getDatabaseName());
return query;
}
String DatabasesOverlay::getTableDataPath(const String & table_name) const

View File

@ -1,7 +1,27 @@
#include "DiskType.h"
#include <Poco/String.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
}
MetadataStorageType metadataTypeFromString(const String & type)
{
auto check_type = Poco::toLower(type);
if (check_type == "local")
return MetadataStorageType::Local;
if (check_type == "plain")
return MetadataStorageType::Plain;
if (check_type == "web")
return MetadataStorageType::StaticWeb;
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
"MetadataStorageFactory: unknown metadata storage type: {}", type);
}
bool DataSourceDescription::operator==(const DataSourceDescription & other) const
{
@ -14,4 +34,32 @@ bool DataSourceDescription::sameKind(const DataSourceDescription & other) const
== std::tie(other.type, other.object_storage_type, other.description);
}
std::string DataSourceDescription::toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
}

View File

@ -17,7 +17,6 @@ enum class ObjectStorageType
{
None,
S3,
S3_Plain,
Azure,
HDFS,
Web,
@ -30,9 +29,11 @@ enum class MetadataStorageType
Local,
Plain,
StaticWeb,
Memory,
};
MetadataStorageType metadataTypeFromString(const String & type);
String toString(DataSourceType data_source_type);
struct DataSourceDescription
{
DataSourceType type;
@ -47,36 +48,7 @@ struct DataSourceDescription
bool operator==(const DataSourceDescription & other) const;
bool sameKind(const DataSourceDescription & other) const;
std::string toString() const
{
switch (type)
{
case DataSourceType::Local:
return "local";
case DataSourceType::RAM:
return "memory";
case DataSourceType::ObjectStorage:
{
switch (object_storage_type)
{
case ObjectStorageType::S3:
return "s3";
case ObjectStorageType::S3_Plain:
return "s3_plain";
case ObjectStorageType::HDFS:
return "hdfs";
case ObjectStorageType::Azure:
return "azure_blob_storage";
case ObjectStorageType::Local:
return "local_blob_storage";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "none";
}
}
}
}
std::string toString() const;
};
}

View File

@ -218,6 +218,7 @@ public:
virtual bool isReadOnly() const { return false; }
virtual bool isWriteOnce() const { return false; }
virtual bool isPlain() const { return false; }
virtual bool supportParallelWrite() const { return false; }

View File

@ -31,6 +31,8 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_)
description = *block_device_id;
else
description = "/";
fs::create_directories(key_prefix);
}
bool LocalObjectStorage::exists(const StoredObject & object) const
@ -106,13 +108,11 @@ std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLI
std::optional<size_t> read_hint,
std::optional<size_t> file_size) const
{
const auto & path = object.remote_path;
if (!file_size)
file_size = tryGetSizeFromFilePath(path);
file_size = tryGetSizeFromFilePath(object.remote_path);
LOG_TEST(log, "Read object: {}", path);
return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size);
LOG_TEST(log, "Read object: {}", object.remote_path);
return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size);
}
std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NOLINT
@ -126,6 +126,11 @@ std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NO
throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files");
LOG_TEST(log, "Write object: {}", object.remote_path);
/// Unlike real blob storage, in local fs we cannot create a file with non-existing prefix.
/// So let's create it.
fs::create_directories(fs::path(object.remote_path).parent_path());
return std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
}
@ -157,9 +162,36 @@ void LocalObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
removeObjectIfExists(object);
}
ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path */) const
ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for LocalObjectStorage");
ObjectMetadata object_metadata;
LOG_TEST(log, "Getting metadata for path: {}", path);
object_metadata.size_bytes = fs::file_size(path);
object_metadata.last_modified = Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(fs::last_write_time(path).time_since_epoch()).count());
return object_metadata;
}
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const
{
for (const auto & entry : fs::directory_iterator(path))
{
if (entry.is_directory())
{
listObjects(entry.path(), children, 0);
continue;
}
auto metadata = getObjectMetadata(entry.path());
children.emplace_back(entry.path(), std::move(metadata));
}
}
bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const
{
/// Unlike real object storage, existence of a prefix path can be checked by
/// just checking existence of this prefix directly, so simple exists is enough here.
return exists(StoredObject(path));
}
void LocalObjectStorage::copyObject( // NOLINT

View File

@ -58,6 +58,10 @@ public:
ObjectMetadata getObjectMetadata(const std::string & path) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
bool existsOrHasAnyChild(const std::string & path) const override;
void copyObject( /// NOLINT
const StoredObject & object_from,
const StoredObject & object_to,

View File

@ -32,6 +32,35 @@ void MetadataStorageFactory::registerMetadataStorageType(const std::string & met
}
}
std::string MetadataStorageFactory::getCompatibilityMetadataTypeHint(const ObjectStorageType & type)
{
switch (type)
{
case ObjectStorageType::S3:
case ObjectStorageType::HDFS:
case ObjectStorageType::Local:
case ObjectStorageType::Azure:
return "local";
case ObjectStorageType::Web:
return "web";
default:
return "";
}
}
std::string MetadataStorageFactory::getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string & compatibility_type_hint)
{
if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config");
}
return config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
}
MetadataStoragePtr MetadataStorageFactory::create(
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
@ -39,12 +68,7 @@ MetadataStoragePtr MetadataStorageFactory::create(
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const
{
if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type"))
{
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config");
}
const auto type = config.getString(config_prefix + ".metadata_type", compatibility_type_hint);
const auto type = getMetadataType(config, config_prefix, compatibility_type_hint);
const auto it = registry.find(type);
if (it == registry.end())

View File

@ -25,6 +25,13 @@ public:
ObjectStoragePtr object_storage,
const std::string & compatibility_type_hint) const;
static std::string getMetadataType(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string & compatibility_type_hint = "");
static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type);
private:
using Registry = std::unordered_map<String, Creator>;
Registry registry;

View File

@ -48,10 +48,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path
std::string directory = object_key.serialize();
if (!directory.ends_with('/'))
directory += '/';
RelativePathsWithMetadata files;
object_storage->listObjects(directory, files, 1);
return !files.empty();
return object_storage->existsOrHasAnyChild(directory);
}
uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const

View File

@ -18,6 +18,8 @@
#include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
#include <Disks/loadLocalDiskConfig.h>
#endif
#include <Disks/ObjectStorages/MetadataStorageFactory.h>
#include <Disks/ObjectStorages/PlainObjectStorage.h>
#include <Interpreters/Context.h>
#include <Common/Macros.h>
@ -32,6 +34,36 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
namespace
{
bool isPlainStorage(
ObjectStorageType type,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix)
{
auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(type);
auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint);
return metadataTypeFromString(metadata_type) == MetadataStorageType::Plain;
}
template <typename BaseObjectStorage, class ...Args>
ObjectStoragePtr createObjectStorage(
ObjectStorageType type,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Args && ...args)
{
if (isPlainStorage(type, config, config_prefix))
{
return std::make_shared<PlainObjectStorage<BaseObjectStorage>>(std::forward<Args>(args)...);
}
else
{
return std::make_shared<BaseObjectStorage>(std::forward<Args>(args)...);
}
}
}
ObjectStorageFactory & ObjectStorageFactory::instance()
{
static ObjectStorageFactory factory;
@ -127,14 +159,14 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<S3ObjectStorage>(
std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
auto object_storage = createObjectStorage<S3ObjectStorage>(
ObjectStorageType::S3, config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name);
checkS3Capabilities(*dynamic_cast<S3ObjectStorage *>(object_storage.get()), s3_capabilities, name);
return object_storage;
});
@ -163,14 +195,14 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<S3PlainObjectStorage>(
auto object_storage = std::make_shared<PlainObjectStorage<S3ObjectStorage>>(
std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name);
/// NOTE: should we still perform this check for clickhouse-disks?
if (!skip_access_check)
checkS3Capabilities(*object_storage, s3_capabilities, name);
checkS3Capabilities(*dynamic_cast<S3ObjectStorage *>(object_storage.get()), s3_capabilities, name);
return object_storage;
});
@ -198,7 +230,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
context->getSettingsRef().hdfs_replication
);
return std::make_unique<HDFSObjectStorage>(uri, std::move(settings), config);
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config);
});
}
#endif
@ -214,12 +246,11 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory)
bool /* skip_access_check */) -> ObjectStoragePtr
{
AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix);
return std::make_unique<AzureObjectStorage>(
name,
return createObjectStorage<AzureObjectStorage>(
ObjectStorageType::Azure, config, config_prefix, name,
getAzureBlobContainerClient(config, config_prefix),
getAzureBlobStorageSettings(config, config_prefix, context),
endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix);
};
factory.registerObjectStorageType("azure_blob_storage", creator);
factory.registerObjectStorageType("azure", creator);
@ -250,7 +281,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory)
ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what());
}
return std::make_shared<WebObjectStorage>(uri, context);
return createObjectStorage<WebObjectStorage>(ObjectStorageType::Web, config, config_prefix, uri, context);
});
}
@ -268,7 +299,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory)
loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes);
/// keys are mapped to the fs, object_key_prefix is a directory also
fs::create_directories(object_key_prefix);
return std::make_shared<LocalObjectStorage>(object_key_prefix);
return createObjectStorage<LocalObjectStorage>(ObjectStorageType::Local, config, config_prefix, object_key_prefix);
};
factory.registerObjectStorageType("local_blob_storage", creator);

View File

@ -0,0 +1,35 @@
#pragma once
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Common/ObjectStorageKeyGenerator.h>
namespace DB
{
/// Do not encode keys, store as-is, and do not require separate disk for metadata.
/// But because of this does not support renames/hardlinks/attrs/...
///
/// NOTE: This disk has excessive API calls.
template <typename BaseObjectStorage>
class PlainObjectStorage : public BaseObjectStorage
{
public:
template <class ...Args>
explicit PlainObjectStorage(Args && ...args)
: BaseObjectStorage(std::forward<Args>(args)...) {}
std::string getName() const override { return "" + BaseObjectStorage::getName(); }
/// Notes:
/// - supports BACKUP to this disk
/// - does not support INSERT into MergeTree table on this disk
bool isWriteOnce() const override { return true; }
bool isPlain() const override { return true; }
ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override
{
return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path);
}
};
}

View File

@ -10,25 +10,6 @@ namespace DB
void registerObjectStorages();
void registerMetadataStorages();
static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type)
{
switch (type)
{
case ObjectStorageType::S3:
case ObjectStorageType::HDFS:
case ObjectStorageType::Local:
case ObjectStorageType::Azure:
return "local";
case ObjectStorageType::S3_Plain:
return "plain";
case ObjectStorageType::Web:
return "web";
case ObjectStorageType::None:
return "";
}
UNREACHABLE();
}
void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_check)
{
registerObjectStorages();
@ -47,7 +28,10 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch
std::string compatibility_metadata_type_hint;
if (!config.has(config_prefix + ".metadata_type"))
{
compatibility_metadata_type_hint = getCompatibilityMetadataTypeHint(object_storage->getType());
if (object_storage->isPlain())
compatibility_metadata_type_hint = "plain";
else
compatibility_metadata_type_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(object_storage->getType());
}
auto metadata_storage = MetadataStorageFactory::instance().create(

View File

@ -15,16 +15,10 @@ namespace ErrorCodes
}
ObjectStorageKeysGeneratorPtr getKeyGenerator(
String type,
const S3::URI & uri,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix)
{
if (type == "s3_plain")
return createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key);
chassert(type == "s3");
bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting();
bool send_metadata = config.getBool(config_prefix + ".send_metadata", false);

View File

@ -12,7 +12,6 @@ namespace DB
namespace S3 { struct URI; }
ObjectStorageKeysGeneratorPtr getKeyGenerator(
String type,
const S3::URI & uri,
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix);

View File

@ -48,6 +48,7 @@ namespace ErrorCodes
{
extern const int S3_ERROR;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
namespace
@ -562,6 +563,8 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const
{
if (!key_generator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
return key_generator->generate(path);
}

View File

@ -182,27 +182,6 @@ private:
LoggerPtr log;
};
/// Do not encode keys, store as-is, and do not require separate disk for metadata.
/// But because of this does not support renames/hardlinks/attrs/...
///
/// NOTE: This disk has excessive API calls.
class S3PlainObjectStorage : public S3ObjectStorage
{
public:
std::string getName() const override { return "S3PlainObjectStorage"; }
template <class ...Args>
explicit S3PlainObjectStorage(Args && ...args)
: S3ObjectStorage("S3PlainObjectStorage", std::forward<Args>(args)...) {}
ObjectStorageType getType() const override { return ObjectStorageType::S3_Plain; }
/// Notes:
/// - supports BACKUP to this disk
/// - does not support INSERT into MergeTree table on this disk
bool isWriteOnce() const override { return true; }
};
}
#endif

View File

@ -86,6 +86,10 @@ WebObjectStorage::loadFiles(const String & path, const std::unique_lock<std::sha
loaded_files.emplace_back(file_path);
}
/// Check for not found url after read attempt, because of delayed initialization.
if (metadata_buf->hasNotFoundURL())
return {};
auto [it, inserted] = files.add(path, FileData::createDirectoryInfo(true));
if (!inserted)
{

View File

@ -1,10 +1,11 @@
#include <Formats/ReadSchemaUtils.h>
#include <Interpreters/Context.h>
#include <Processors/Formats/ISchemaReader.h>
#include <Common/assert_cast.h>
#include <IO/WithFileSize.h>
#include <IO/EmptyReadBuffer.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/WithFileSize.h>
#include <Interpreters/Context.h>
#include <Processors/Formats/ISchemaReader.h>
#include <Storages/IStorage.h>
#include <Common/assert_cast.h>
namespace DB
{

View File

@ -1007,8 +1007,13 @@ private:
if (!(*null_map)[row])
continue;
}
else if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value))
continue;
else
{
if (null_map && (*null_map)[row])
continue;
if (!applyVisitor(FieldVisitorAccurateEquals(), arr[i], value))
continue;
}
ConcreteAction::apply(data[row], i);

View File

@ -4,6 +4,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
#include <Functions/FunctionFactory.h>
#include <Functions/IFunction.h>

View File

@ -13,7 +13,15 @@ using FunctionSin = FunctionMathUnary<UnaryFunctionVectorized<SinName, sin>>;
REGISTER_FUNCTION(Sin)
{
factory.registerFunction<FunctionSin>({}, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionSin>(
FunctionDocumentation{
.description = "Returns the sine of the argument.",
.syntax = "sin(x)",
.arguments = {{"x", "The number whose sine will be returned. (U)Int*, Float* or Decimal*."}},
.returned_value = "The sine of x.",
.examples = {{.name = "simple", .query = "SELECT sin(1.23)", .result = "0.9424888019316975"}},
.categories{"Mathematical", "Trigonometric"}},
FunctionFactory::CaseInsensitive);
}
}

View File

@ -62,32 +62,17 @@ public:
{
}
/// Get the name of the function.
String getName() const override
{
return name;
}
/// Do not sleep during query analysis.
bool isSuitableForConstantFolding() const override
{
return false;
}
size_t getNumberOfArguments() const override
{
return 1;
}
String getName() const override { return name; }
bool isSuitableForConstantFolding() const override { return false; } /// Do not sleep during query analysis.
size_t getNumberOfArguments() const override { return 1; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
WhichDataType which(arguments[0]);
if (!which.isFloat()
&& !which.isNativeUInt())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Float64",
if (!which.isFloat() && !which.isNativeUInt())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected UInt* or Float*",
arguments[0]->getName(), getName());
return std::make_shared<DataTypeUInt8>();

View File

@ -21,7 +21,35 @@ using FunctionSimpleJSONExtractBool = FunctionsStringSearch<ExtractParamImpl<Nam
REGISTER_FUNCTION(VisitParamExtractBool)
{
factory.registerFunction<FunctionSimpleJSONExtractBool>();
factory.registerFunction<FunctionSimpleJSONExtractBool>(FunctionDocumentation{
.description = "Parses a true/false value from the value of the field named field_name. The result is UInt8.",
.syntax = "simpleJSONExtractBool(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value
= R"(It returns 1 if the value of the field is true, 0 otherwise. This means this function will return 0 including (and not only) in the following cases:
- If the field doesn't exists.
- If the field contains true as a string, e.g.: {"field":"true"}.
- If the field contains 1 as a numerical value.)",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(0
1
0
0)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractBool", "simpleJSONExtractBool");
}

View File

@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractFloat = FunctionsStringSearch<ExtractParamImpl<Na
REGISTER_FUNCTION(VisitParamExtractFloat)
{
factory.registerFunction<FunctionSimpleJSONExtractFloat>();
factory.registerFunction<FunctionSimpleJSONExtractFloat>(FunctionDocumentation{
.description
= "Parses Float64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the "
"beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.",
.syntax = "simpleJSONExtractFloat(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(0
-4000
0
-3.4
5)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractFloat", "simpleJSONExtractFloat");
}

View File

@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractInt = FunctionsStringSearch<ExtractParamImpl<Name
REGISTER_FUNCTION(VisitParamExtractInt)
{
factory.registerFunction<FunctionSimpleJSONExtractInt>();
factory.registerFunction<FunctionSimpleJSONExtractInt>(FunctionDocumentation{
.description
= "Parses Int64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the "
"beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.",
.syntax = "simpleJSONExtractInt(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(0
-4
0
-3
5)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractInt", "simpleJSONExtractInt");
}

View File

@ -61,7 +61,35 @@ using FunctionSimpleJSONExtractRaw = FunctionsStringSearchToString<ExtractParamT
REGISTER_FUNCTION(VisitParamExtractRaw)
{
factory.registerFunction<FunctionSimpleJSONExtractRaw>();
factory.registerFunction<FunctionSimpleJSONExtractRaw>(FunctionDocumentation{
.description = "Returns the value of the field named field_name as a String, including separators.",
.syntax = "simpleJSONExtractRaw(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value
= "It returns the value of the field as a String including separators if the field exists, or an empty String otherwise.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
INSERT INTO jsons VALUES ('{"foo":-3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(
"-4e3"
-3.4
5
{"def":[1,2,3]})"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractRaw", "simpleJSONExtractRaw");
}

View File

@ -22,7 +22,35 @@ using FunctionSimpleJSONExtractString = FunctionsStringSearchToString<ExtractPar
REGISTER_FUNCTION(VisitParamExtractString)
{
factory.registerFunction<FunctionSimpleJSONExtractString>();
factory.registerFunction<FunctionSimpleJSONExtractString>(FunctionDocumentation{
.description = R"(Parses String in double quotes from the value of the field named field_name.
There is currently no support for code points in the format \uXXXX\uYYYY that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).)",
.syntax = "simpleJSONExtractString(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value = "It returns the value of a field as a String, including separators. The value is unescaped. It returns an empty "
"String: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
INSERT INTO jsons VALUES ('{"foo":"hello}');
SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(\n\0
)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractString", "simpleJSONExtractString");
}

View File

@ -12,7 +12,36 @@ using FunctionSimpleJSONExtractUInt = FunctionsStringSearch<ExtractParamImpl<Nam
REGISTER_FUNCTION(VisitParamExtractUInt)
{
factory.registerFunction<FunctionSimpleJSONExtractUInt>();
factory.registerFunction<FunctionSimpleJSONExtractUInt>(FunctionDocumentation{
.description
= "Parses UInt64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the "
"beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.",
.syntax = "simpleJSONExtractUInt(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"4e3"}');
INSERT INTO jsons VALUES ('{"foo":3.4}');
INSERT INTO jsons VALUES ('{"foo":5}');
INSERT INTO jsons VALUES ('{"foo":"not1number"}');
INSERT INTO jsons VALUES ('{"baz":2}');
SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;)",
.result = R"(0
4
0
3
5)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamExtractUInt", "simpleJSONExtractUInt");
}

View File

@ -21,7 +21,28 @@ using FunctionSimpleJSONHas = FunctionsStringSearch<ExtractParamImpl<NameSimpleJ
REGISTER_FUNCTION(VisitParamHas)
{
factory.registerFunction<FunctionSimpleJSONHas>();
factory.registerFunction<FunctionSimpleJSONHas>(FunctionDocumentation{
.description = "Checks whether there is a field named field_name. The result is UInt8.",
.syntax = "simpleJSONHas(json, field_name)",
.arguments
= {{"json", "The JSON in which the field is searched for. String."},
{"field_name", "The name of the field to search for. String literal."}},
.returned_value = "It returns 1 if the field exists, 0 otherwise.",
.examples
= {{.name = "simple",
.query = R"(CREATE TABLE jsons
(
json String
)
ENGINE = Memory;
INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
SELECT simpleJSONHas(json, 'foo') FROM jsons;
SELECT simpleJSONHas(json, 'bar') FROM jsons;)",
.result = R"(1
0)"}},
.categories{"JSON"}});
factory.registerAlias("visitParamHas", "simpleJSONHas");
}

View File

@ -449,6 +449,7 @@ bool ReadWriteBufferFromHTTP::nextImpl()
if (http_skip_not_found_url && e.getHTTPStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND)
{
next_result = false;
has_not_found_url = true;
return;
}
@ -740,4 +741,3 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::parseFileInfo(con
}
}

View File

@ -79,6 +79,7 @@ private:
const bool use_external_buffer;
const bool http_skip_not_found_url;
bool has_not_found_url = false;
std::function<void(std::ostream &)> out_stream_callback;
@ -183,6 +184,8 @@ public:
std::optional<time_t> tryGetLastModificationTime();
bool hasNotFoundURL() const { return has_not_found_url; }
HTTPFileInfo getFileInfo();
static HTTPFileInfo parseFileInfo(const Poco::Net::HTTPResponse & response, size_t requested_range_begin);
};

View File

@ -1111,6 +1111,7 @@ void NO_INLINE Aggregator::executeImpl(
bool all_keys_are_const,
AggregateDataPtr overflow_row) const
{
bool use_compiled_functions = false;
if (!no_more_keys)
{
/// Prefetching doesn't make sense for small hash tables, because they fit in caches entirely.
@ -1118,33 +1119,47 @@ void NO_INLINE Aggregator::executeImpl(
&& (method.data.getBufferSizeInBytes() > min_bytes_for_prefetch);
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions))
{
if (prefetch)
executeImplBatch<false, true, true>(
method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row);
else
executeImplBatch<false, true, false>(
method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row);
}
else
use_compiled_functions = compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions);
#endif
{
if (prefetch)
executeImplBatch<false, false, true>(
method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row);
else
executeImplBatch<false, false, false>(
method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row);
}
if (prefetch)
executeImplBatch<false, true>(
method,
state,
aggregates_pool,
row_begin,
row_end,
aggregate_instructions,
all_keys_are_const,
use_compiled_functions,
overflow_row);
else
executeImplBatch<false, false>(
method,
state,
aggregates_pool,
row_begin,
row_end,
aggregate_instructions,
all_keys_are_const,
use_compiled_functions,
overflow_row);
}
else
{
executeImplBatch<true, false, false>(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row);
executeImplBatch<true, false>(
method,
state,
aggregates_pool,
row_begin,
row_end,
aggregate_instructions,
all_keys_are_const,
use_compiled_functions,
overflow_row);
}
}
template <bool no_more_keys, bool use_compiled_functions, bool prefetch, typename Method, typename State>
template <bool no_more_keys, bool prefetch, typename Method, typename State>
void NO_INLINE Aggregator::executeImplBatch(
Method & method,
State & state,
@ -1153,6 +1168,7 @@ void NO_INLINE Aggregator::executeImplBatch(
size_t row_end,
AggregateFunctionInstruction * aggregate_instructions,
bool all_keys_are_const,
bool use_compiled_functions [[maybe_unused]],
AggregateDataPtr overflow_row) const
{
using KeyHolder = decltype(state.getKeyHolder(0, std::declval<Arena &>()));
@ -1284,7 +1300,7 @@ void NO_INLINE Aggregator::executeImplBatch(
aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (use_compiled_functions)
{
const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_aggregate_functions.create_aggregate_states_function(aggregate_data);
@ -1293,20 +1309,6 @@ void NO_INLINE Aggregator::executeImplBatch(
static constexpr bool skip_compiled_aggregate_functions = true;
createAggregateStates<skip_compiled_aggregate_functions>(aggregate_data);
}
#if defined(MEMORY_SANITIZER)
/// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place.
for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index)
{
if (!is_aggregate_function_compiled[aggregate_function_index])
continue;
auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index];
auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData();
__msan_unpoison(aggregate_data_with_offset, data_size);
}
#endif
}
else
#endif
@ -1339,7 +1341,7 @@ void NO_INLINE Aggregator::executeImplBatch(
}
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (use_compiled_functions)
{
std::vector<ColumnData> columns_data;
@ -1372,9 +1374,8 @@ void NO_INLINE Aggregator::executeImplBatch(
for (size_t i = 0; i < aggregate_functions.size(); ++i)
{
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (is_aggregate_function_compiled[i])
continue;
if (use_compiled_functions && is_aggregate_function_compiled[i])
continue;
#endif
AggregateFunctionInstruction * inst = aggregate_instructions + i;
@ -1387,18 +1388,19 @@ void NO_INLINE Aggregator::executeImplBatch(
}
template <bool use_compiled_functions>
void NO_INLINE Aggregator::executeWithoutKeyImpl(
AggregatedDataWithoutKey & res,
size_t row_begin, size_t row_end,
size_t row_begin,
size_t row_end,
AggregateFunctionInstruction * aggregate_instructions,
Arena * arena) const
Arena * arena,
bool use_compiled_functions [[maybe_unused]]) const
{
if (row_begin == row_end)
return;
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (use_compiled_functions)
{
std::vector<ColumnData> columns_data;
@ -1418,20 +1420,6 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl(
auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place;
add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), res);
#if defined(MEMORY_SANITIZER)
/// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place.
for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index)
{
if (!is_aggregate_function_compiled[aggregate_function_index])
continue;
auto aggregate_data_with_offset = res + offsets_of_aggregate_states[aggregate_function_index];
auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData();
__msan_unpoison(aggregate_data_with_offset, data_size);
}
#endif
}
#endif
@ -1439,13 +1427,10 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl(
for (size_t i = 0; i < aggregate_functions.size(); ++i)
{
AggregateFunctionInstruction * inst = aggregate_instructions + i;
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (is_aggregate_function_compiled[i])
continue;
if (use_compiled_functions && is_aggregate_function_compiled[i])
continue;
#endif
addBatchSinglePlace(row_begin, row_end, inst, res + inst->state_offset, arena);
}
}
@ -1704,16 +1689,14 @@ bool Aggregator::executeOnBlock(Columns columns,
if (result.type == AggregatedDataVariants::Type::without_key)
{
/// TODO: Enable compilation after investigation
// #if USE_EMBEDDED_COMPILER
// if (compiled_aggregate_functions_holder)
// {
// executeWithoutKeyImpl<true>(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool);
// }
// else
// #endif
{
executeWithoutKeyImpl<false>(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool);
}
bool use_compiled_functions = false;
executeWithoutKeyImpl(
result.without_key,
row_begin,
row_end,
aggregate_functions_instructions.data(),
result.aggregates_pool,
use_compiled_functions);
}
else
{
@ -1965,19 +1948,13 @@ Aggregator::convertToBlockImpl(Method & method, Table & data, Arena * arena, Are
ConvertToBlockRes<return_single_block> res;
bool use_compiled_functions = false;
if (final)
{
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization;
res = convertToBlockImplFinal<Method, use_compiled_functions, return_single_block>(method, data, arena, aggregates_pools, rows);
}
else
use_compiled_functions = compiled_aggregate_functions_holder != nullptr && !Method::low_cardinality_optimization;
#endif
{
res = convertToBlockImplFinal<Method, false, return_single_block>(method, data, arena, aggregates_pools, rows);
}
res = convertToBlockImplFinal<Method, return_single_block>(method, data, arena, aggregates_pools, use_compiled_functions, rows);
}
else
{
@ -2059,8 +2036,12 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu
}
template <bool use_compiled_functions>
Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data [[maybe_unused]]) const
Block Aggregator::insertResultsIntoColumns(
PaddedPODArray<AggregateDataPtr> & places,
OutputBlockColumns && out_cols,
Arena * arena,
bool has_null_key_data [[maybe_unused]],
bool use_compiled_functions [[maybe_unused]]) const
{
std::exception_ptr exception;
size_t aggregate_functions_destroy_index = 0;
@ -2068,7 +2049,7 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
try
{
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (use_compiled_functions)
{
/** For JIT compiled functions we need to resize columns before pass them into compiled code.
* insert_aggregates_into_columns_function function does not throw exception.
@ -2098,14 +2079,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
for (; aggregate_functions_destroy_index < params.aggregates_size;)
{
if constexpr (use_compiled_functions)
#if USE_EMBEDDED_COMPILER
if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
if (is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
++aggregate_functions_destroy_index;
continue;
}
++aggregate_functions_destroy_index;
continue;
}
#endif
auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index];
size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index];
@ -2127,14 +2107,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
for (; aggregate_functions_destroy_index < params.aggregates_size; ++aggregate_functions_destroy_index)
{
if constexpr (use_compiled_functions)
#if USE_EMBEDDED_COMPILER
if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
if (is_aggregate_function_compiled[aggregate_functions_destroy_index])
{
++aggregate_functions_destroy_index;
continue;
}
++aggregate_functions_destroy_index;
continue;
}
#endif
size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index];
aggregate_functions[aggregate_functions_destroy_index]->destroyBatch(0, places.size(), places.data(), offset);
@ -2146,9 +2125,9 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & pl
return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size());
}
template <typename Method, bool use_compiled_functions, bool return_single_block, typename Table>
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE
Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const
template <typename Method, bool return_single_block, typename Table>
Aggregator::ConvertToBlockRes<return_single_block> NO_INLINE Aggregator::convertToBlockImplFinal(
Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions [[maybe_unused]], size_t) const
{
/// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated
const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1;
@ -2204,7 +2183,8 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
{
if (places.size() >= max_block_size)
{
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
res.emplace_back(
insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions));
places.clear();
out_cols.reset();
has_null_key_data = false;
@ -2214,12 +2194,13 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena
if constexpr (return_single_block)
{
return insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data);
return insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions);
}
else
{
if (out_cols.has_value())
res.emplace_back(insertResultsIntoColumns<use_compiled_functions>(places, std::move(out_cols.value()), arena, has_null_key_data));
res.emplace_back(
insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions));
return res;
}
}
@ -2609,8 +2590,9 @@ void NO_INLINE Aggregator::mergeDataNullKey(
}
}
template <typename Method, bool use_compiled_functions, bool prefetch, typename Table>
void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const
template <typename Method, bool prefetch, typename Table>
void NO_INLINE
Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]]) const
{
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
@ -2637,7 +2619,7 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A
table_src.clearAndShrink();
#if USE_EMBEDDED_COMPILER
if constexpr (use_compiled_functions)
if (use_compiled_functions)
{
const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions;
compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size());
@ -2787,26 +2769,16 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
if (!no_more_keys)
{
bool use_compiled_functions = false;
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
if (prefetch)
mergeDataImpl<Method, true, true>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool);
else
mergeDataImpl<Method, true, false>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool);
}
else
use_compiled_functions = compiled_aggregate_functions_holder != nullptr;
#endif
{
if (prefetch)
mergeDataImpl<Method, false, true>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool);
else
mergeDataImpl<Method, false, false>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool);
}
if (prefetch)
mergeDataImpl<Method, true>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, use_compiled_functions);
else
mergeDataImpl<Method, false>(
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, use_compiled_functions);
}
else if (res->without_key)
{
@ -2851,26 +2823,22 @@ void NO_INLINE Aggregator::mergeBucketImpl(
return;
AggregatedDataVariants & current = *data[result_num];
bool use_compiled_functions = false;
#if USE_EMBEDDED_COMPILER
if (compiled_aggregate_functions_holder)
{
if (prefetch)
mergeDataImpl<Method, true, true>(
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena);
else
mergeDataImpl<Method, true, false>(
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena);
}
else
use_compiled_functions = compiled_aggregate_functions_holder != nullptr;
#endif
{
if (prefetch)
mergeDataImpl<Method, false, true>(
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena);
else
mergeDataImpl<Method, false, false>(
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena);
}
if (prefetch)
mergeDataImpl<Method, true>(
getDataVariant<Method>(*res).data.impls[bucket],
getDataVariant<Method>(current).data.impls[bucket],
arena,
use_compiled_functions);
else
mergeDataImpl<Method, false>(
getDataVariant<Method>(*res).data.impls[bucket],
getDataVariant<Method>(current).data.impls[bucket],
arena,
use_compiled_functions);
}
}
@ -2938,11 +2906,12 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData
return non_empty_data;
}
template <bool no_more_keys, typename State, typename Table>
template <typename State, typename Table>
void NO_INLINE Aggregator::mergeStreamsImplCase(
Arena * aggregates_pool,
State & state,
Table & data,
bool no_more_keys,
AggregateDataPtr overflow_row,
size_t row_begin,
size_t row_end,
@ -2954,36 +2923,34 @@ void NO_INLINE Aggregator::mergeStreamsImplCase(
if (!arena_for_keys)
arena_for_keys = aggregates_pool;
for (size_t i = row_begin; i < row_end; ++i)
if (no_more_keys)
{
AggregateDataPtr aggregate_data = nullptr;
if constexpr (!no_more_keys)
for (size_t i = row_begin; i < row_end; i++)
{
auto emplace_result = state.emplaceKey(data, i, *arena_for_keys); // NOLINT
if (emplace_result.isInserted())
auto find_result = state.findKey(data, i, *arena_for_keys);
/// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.
AggregateDataPtr value = find_result.isFound() ? find_result.getMapped() : overflow_row;
places[i] = value;
}
}
else
{
for (size_t i = row_begin; i < row_end; i++)
{
auto emplace_result = state.emplaceKey(data, i, *arena_for_keys);
if (!emplace_result.isInserted())
places[i] = emplace_result.getMapped();
else
{
emplace_result.setMapped(nullptr);
aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
AggregateDataPtr aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
createAggregateStates(aggregate_data);
emplace_result.setMapped(aggregate_data);
places[i] = aggregate_data;
}
else
aggregate_data = emplace_result.getMapped();
}
else
{
auto find_result = state.findKey(data, i, *arena_for_keys);
if (find_result.isFound())
aggregate_data = find_result.getMapped();
}
/// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys.
AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row;
places[i] = value;
}
for (size_t j = 0; j < params.aggregates_size; ++j)
@ -3037,22 +3004,16 @@ void NO_INLINE Aggregator::mergeStreamsImpl(
if (use_cache)
{
typename Method::State state(key_columns, key_sizes, aggregation_state_cache);
if (!no_more_keys)
mergeStreamsImplCase<false>(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
else
mergeStreamsImplCase<true>(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
mergeStreamsImplCase(
aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
consecutive_keys_cache_stats.update(row_end - row_begin, state.getCacheMissesSinceLastReset());
}
else
{
typename Method::StateNoCache state(key_columns, key_sizes, aggregation_state_cache);
if (!no_more_keys)
mergeStreamsImplCase<false>(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
else
mergeStreamsImplCase<true>(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
mergeStreamsImplCase(
aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys);
}
}

View File

@ -1395,7 +1395,7 @@ private:
AggregateDataPtr overflow_row) const;
/// Specialization for a particular value no_more_keys.
template <bool no_more_keys, bool use_compiled_functions, bool prefetch, typename Method, typename State>
template <bool no_more_keys, bool prefetch, typename Method, typename State>
void executeImplBatch(
Method & method,
State & state,
@ -1404,16 +1404,17 @@ private:
size_t row_end,
AggregateFunctionInstruction * aggregate_instructions,
bool all_keys_are_const,
bool use_compiled_functions,
AggregateDataPtr overflow_row) const;
/// For case when there are no keys (all aggregate into one row).
template <bool use_compiled_functions>
void executeWithoutKeyImpl(
AggregatedDataWithoutKey & res,
size_t row_begin,
size_t row_end,
AggregateFunctionInstruction * aggregate_instructions,
Arena * arena) const;
Arena * arena,
bool use_compiled_functions) const;
template <typename Method>
void writeToTemporaryFileImpl(
@ -1429,8 +1430,8 @@ private:
Arena * arena) const;
/// Merge data from hash table `src` into `dst`.
template <typename Method, bool use_compiled_functions, bool prefetch, typename Table>
void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const;
template <typename Method, bool prefetch, typename Table>
void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions) const;
/// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
template <typename Method, typename Table>
@ -1467,12 +1468,16 @@ private:
MutableColumns & final_aggregate_columns,
Arena * arena) const;
template <bool use_compiled_functions>
Block insertResultsIntoColumns(PaddedPODArray<AggregateDataPtr> & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data) const;
Block insertResultsIntoColumns(
PaddedPODArray<AggregateDataPtr> & places,
OutputBlockColumns && out_cols,
Arena * arena,
bool has_null_key_data,
bool use_compiled_functions) const;
template <typename Method, bool use_compiled_functions, bool return_single_block, typename Table>
ConvertToBlockRes<return_single_block>
convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows) const;
template <typename Method, bool return_single_block, typename Table>
ConvertToBlockRes<return_single_block> convertToBlockImplFinal(
Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions, size_t rows) const;
template <bool return_single_block, typename Method, typename Table>
ConvertToBlockRes<return_single_block>
@ -1508,11 +1513,12 @@ private:
bool final,
ThreadPool * thread_pool) const;
template <bool no_more_keys, typename State, typename Table>
template <typename State, typename Table>
void mergeStreamsImplCase(
Arena * aggregates_pool,
State & state,
Table & data,
bool no_more_keys,
AggregateDataPtr overflow_row,
size_t row_begin,
size_t row_end,

View File

@ -1,15 +1,14 @@
#pragma once
#include <Core/UUID.h>
#include <Databases/IDatabase.h>
#include <Databases/TablesDependencyGraph.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/StorageID.h>
#include <Databases/TablesDependencyGraph.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/IStorage_fwd.h>
#include "Common/NamePrompter.h"
#include <Common/NamePrompter.h>
#include <Common/SharedMutex.h>
#include "Storages/IStorage.h"
#include "Databases/IDatabase.h"
#include <boost/noncopyable.hpp>
#include <Poco/Logger.h>

View File

@ -1,21 +1,18 @@
#include <Compression/CompressedWriteBuffer.h>
#include <Formats/NativeWriter.h>
#include <Formats/formatBlock.h>
#include <Interpreters/Context.h>
#include <Interpreters/GraceHashJoin.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/TableJoin.h>
#include <Formats/NativeWriter.h>
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Compression/CompressedWriteBuffer.h>
#include <base/FnTraits.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <Common/thread_local_rng.h>
#include <base/FnTraits.h>
#include <fmt/format.h>
#include <Formats/formatBlock.h>
#include <numeric>
#include <fmt/format.h>
namespace CurrentMetrics

View File

@ -2,11 +2,11 @@
#include <boost/noncopyable.hpp>
#include <Interpreters/Context.h>
#include <Disks/TemporaryFileOnDisk.h>
#include <Core/Block.h>
#include <Disks/IVolume.h>
#include <Common/CurrentMetrics.h>
#include <Disks/TemporaryFileOnDisk.h>
#include <Interpreters/Cache/FileSegment.h>
#include <Common/CurrentMetrics.h>
namespace CurrentMetrics

View File

@ -304,6 +304,9 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
log_settings.turn_off_logger = DB::TextLog::shouldTurnOffLogger();
log_settings.database = config.getString("text_log.database", "system");
log_settings.table = config.getString("text_log.table", "text_log");
split->addTextLog(DB::TextLog::getLogQueue(log_settings), text_log_level);
}
#endif

View File

@ -62,7 +62,7 @@ protected:
settings.ostr << '.';
}
chassert(table);
chassert(table != nullptr, "Table is empty for the ASTQueryWithTableAndOutputImpl.");
table->formatImpl(settings, state, frame);
}
};

View File

@ -85,6 +85,15 @@ using ASTShowCreateDictionaryQuery = ASTQueryWithTableAndOutputImpl<ASTShowCreat
class ASTExistsDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTExistsDatabaseQueryIDAndQueryNames>
{
public:
ASTPtr clone() const override
{
auto res = std::make_shared<ASTExistsDatabaseQuery>(*this);
res->children.clear();
cloneTableOptions(*res);
return res;
}
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
{
@ -98,6 +107,15 @@ protected:
class ASTShowCreateDatabaseQuery : public ASTQueryWithTableAndOutputImpl<ASTShowCreateDatabaseQueryIDAndQueryNames>
{
public:
ASTPtr clone() const override
{
auto res = std::make_shared<ASTShowCreateDatabaseQuery>(*this);
res->children.clear();
cloneTableOptions(*res);
return res;
}
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override
{

View File

@ -3,6 +3,8 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
#include <Interpreters/Context.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/WindowNode.h>

View File

@ -8,6 +8,7 @@
#include <IO/Operators.h>
#include <Interpreters/Aggregator.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Processors/Merges/AggregatingSortedTransform.h>
#include <Processors/Merges/FinishAggregatingInOrderTransform.h>
#include <Processors/QueryPlan/AggregatingStep.h>

View File

@ -1,10 +1,11 @@
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/ExpressionActions.h>
#include <Processors/QueryPlan/AggregatingStep.h>
#include <Processors/QueryPlan/CubeStep.h>
#include <Processors/Transforms/CubeTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/QueryPlan/AggregatingStep.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
namespace DB
{

View File

@ -1,6 +1,7 @@
#include <memory>
#include <stdexcept>
#include <IO/Operators.h>
#include <Interpreters/Context.h>
#include <Processors/Merges/MergingSortedTransform.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Processors/Transforms/FinishSortingTransform.h>

View File

@ -25,6 +25,8 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_)
{"unreserved_space", std::make_shared<DataTypeUInt64>(), "Free space which is not taken by reservations (free_space minus the size of reservations taken by merges, inserts, and other disk write operations currently running)."},
{"keep_free_space", std::make_shared<DataTypeUInt64>(), "Amount of disk space that should stay free on disk in bytes. Defined in the keep_free_space_bytes parameter of disk configuration."},
{"type", std::make_shared<DataTypeString>(), "The disk type which tells where this disk stores the data - RAM, local drive or remote storage."},
{"object_storage_type", std::make_shared<DataTypeString>(), "Type of object storage if disk type is object_storage"},
{"metadata_type", std::make_shared<DataTypeString>(), "Type of metadata storage if disk type is object_storage"},
{"is_encrypted", std::make_shared<DataTypeUInt8>(), "Flag which shows whether this disk ecrypts the underlying data. "},
{"is_read_only", std::make_shared<DataTypeUInt8>(), "Flag which indicates that you can only perform read operations with this disk."},
{"is_write_once", std::make_shared<DataTypeUInt8>(), "Flag which indicates if disk is write-once. Which means that it does support BACKUP to this disk, but does not support INSERT into MergeTree table on this disk."},
@ -53,6 +55,8 @@ Pipe StorageSystemDisks::read(
MutableColumnPtr col_unreserved = ColumnUInt64::create();
MutableColumnPtr col_keep = ColumnUInt64::create();
MutableColumnPtr col_type = ColumnString::create();
MutableColumnPtr col_object_storage_type = ColumnString::create();
MutableColumnPtr col_metadata_type = ColumnString::create();
MutableColumnPtr col_is_encrypted = ColumnUInt8::create();
MutableColumnPtr col_is_read_only = ColumnUInt8::create();
MutableColumnPtr col_is_write_once = ColumnUInt8::create();
@ -69,7 +73,9 @@ Pipe StorageSystemDisks::read(
col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits<UInt64>::max()));
col_keep->insert(disk_ptr->getKeepingFreeSpace());
auto data_source_description = disk_ptr->getDataSourceDescription();
col_type->insert(data_source_description.toString());
col_type->insert(magic_enum::enum_name(data_source_description.type));
col_object_storage_type->insert(magic_enum::enum_name(data_source_description.object_storage_type));
col_metadata_type->insert(magic_enum::enum_name(data_source_description.metadata_type));
col_is_encrypted->insert(data_source_description.is_encrypted);
col_is_read_only->insert(disk_ptr->isReadOnly());
col_is_write_once->insert(disk_ptr->isWriteOnce());
@ -91,6 +97,8 @@ Pipe StorageSystemDisks::read(
res_columns.emplace_back(std::move(col_unreserved));
res_columns.emplace_back(std::move(col_keep));
res_columns.emplace_back(std::move(col_type));
res_columns.emplace_back(std::move(col_object_storage_type));
res_columns.emplace_back(std::move(col_metadata_type));
res_columns.emplace_back(std::move(col_is_encrypted));
res_columns.emplace_back(std::move(col_is_read_only));
res_columns.emplace_back(std::move(col_is_write_once));

View File

@ -51,7 +51,7 @@ class Queue:
label: str
def get_scales() -> Tuple[int, int]:
def get_scales(runner_type: str) -> Tuple[int, int]:
"returns the multipliers for scaling down and up ASG by types"
# Scaling down is quicker on the lack of running jobs than scaling up on
# queue
@ -63,8 +63,12 @@ def get_scales() -> Tuple[int, int]:
# 10. I am trying 7 now.
# 7 still looks a bit slow, so I try 6
# Let's have it the same as the other ASG
#
# All type of style-checkers should be added very quickly to not block the workflows
# UPDATE THE COMMENT ON CHANGES
scale_up = 3
if "style" in runner_type:
scale_up = 1
return scale_down, scale_up
@ -95,7 +99,7 @@ def set_capacity(
continue
raise ValueError("Queue status is not in ['in_progress', 'queued']")
scale_down, scale_up = get_scales()
scale_down, scale_up = get_scales(runner_type)
# With lyfecycle hooks some instances are actually free because some of
# them are in 'Terminating:Wait' state
effective_capacity = max(
@ -138,7 +142,7 @@ def set_capacity(
logging.info(
"The ASG %s capacity will be increased to %s, current capacity=%s, "
"effective capacity=%sm maximum capacity=%s, running jobs=%s, queue size=%s",
"effective capacity=%s, maximum capacity=%s, running jobs=%s, queue size=%s",
asg["AutoScalingGroupName"],
desired_capacity,
effective_capacity,

View File

@ -80,7 +80,7 @@ class TestSetCapacity(unittest.TestCase):
),
TestCase("increase-1", 1, 13, 20, [Queue("queued", 23, "increase-1")], 17),
TestCase(
"style-checker", 1, 13, 20, [Queue("queued", 33, "style-checker")], 20
"style-checker", 1, 13, 20, [Queue("queued", 19, "style-checker")], 19
),
TestCase("increase-2", 1, 13, 20, [Queue("queued", 18, "increase-2")], 15),
TestCase("increase-3", 1, 13, 20, [Queue("queued", 183, "increase-3")], 20),

View File

@ -33,7 +33,7 @@ def start_mock_servers(cluster, script_dir, mocks, timeout=100):
cluster.exec_in_container(
container_id,
["python", server_name, str(port)],
["python3", server_name, str(port)],
detach=True,
)

View File

@ -8,9 +8,16 @@
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</backup_disk_s3_plain>
<backup_disk_local_plain>
<type>object_storage</type>
<object_storage_type>local_blob_storage</object_storage_type>
<metadata_type>plain</metadata_type>
<path>/local_plain/</path>
</backup_disk_local_plain>
</disks>
</storage_configuration>
<backups>
<allowed_disk>backup_disk_s3_plain</allowed_disk>
<allowed_disk>backup_disk_local_plain</allowed_disk>
</backups>
</clickhouse>

View File

@ -21,16 +21,55 @@ def start_cluster():
cluster.shutdown()
s3_disk_def = """disk(type=s3_plain,
endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{}/',
access_key_id='minio',
secret_access_key='minio123');"""
local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/');"
@pytest.mark.parametrize(
"table_name,backup_name,storage_policy,min_bytes_for_wide_part",
"table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part",
[
pytest.param(
"compact", "backup_compact", "s3_backup_compact", int(1e9), id="compact"
"compact",
"backup_compact_s3",
"backup_disk_s3_plain",
s3_disk_def,
int(1e9),
id="compact",
),
pytest.param(
"wide",
"backup_wide_s3",
"backup_disk_s3_plain",
s3_disk_def,
int(0),
id="wide",
),
pytest.param(
"compact",
"backup_compact_local",
"backup_disk_local_plain",
local_disk_def,
int(1e9),
id="compact",
),
pytest.param(
"wide",
"backup_wide_local",
"backup_disk_local_plain",
local_disk_def,
int(0),
id="wide",
),
pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"),
],
)
def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part):
def test_attach_part(
table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part
):
disk_definition = disk_def.format(backup_name)
node.query(
f"""
-- Catch any errors (NOTE: warnings are ok)
@ -45,7 +84,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide
settings min_bytes_for_wide_part={min_bytes_for_wide_part}
as select number%5 part, number key from numbers(100);
backup table ordinary_db.{table_name} TO Disk('backup_disk_s3_plain', '{backup_name}') settings deduplicate_files=0;
backup table ordinary_db.{table_name} TO Disk('{storage_policy}', '{backup_name}') settings deduplicate_files=0;
drop table ordinary_db.{table_name};
attach table ordinary_db.{table_name} (part UInt8, key UInt64)
@ -53,10 +92,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide
order by key partition by part
settings
max_suspicious_broken_parts=0,
disk=disk(type=s3_plain,
endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/',
access_key_id='minio',
secret_access_key='minio123');
disk={disk_definition}
"""
)

View File

@ -124,15 +124,17 @@ def check_backup_and_restore(
def check_system_tables(backup_query_id=None):
disks = [
tuple(disk.split("\t"))
for disk in node.query("SELECT name, type FROM system.disks").split("\n")
for disk in node.query(
"SELECT name, type, object_storage_type, metadata_type FROM system.disks"
).split("\n")
if disk
]
expected_disks = (
("default", "local"),
("disk_s3", "s3"),
("disk_s3_cache", "s3"),
("disk_s3_other_bucket", "s3"),
("disk_s3_plain", "s3_plain"),
("default", "Local", "None", "None"),
("disk_s3", "ObjectStorage", "S3", "Local"),
("disk_s3_cache", "ObjectStorage", "S3", "Local"),
("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"),
("disk_s3_plain", "ObjectStorage", "S3", "Plain"),
)
assert len(expected_disks) == len(disks)
for expected_disk in expected_disks:

View File

@ -3,10 +3,10 @@ from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV
disk_types = {
"default": "local",
"disk_s3": "s3",
"disk_hdfs": "hdfs",
"disk_encrypted": "s3",
"default": "Local",
"disk_s3": "S3",
"disk_hdfs": "HDFS",
"disk_encrypted": "S3",
}
@ -52,22 +52,63 @@ def test_different_types(cluster):
), f"{fields[name_col_ix]} expected to be non-encrypted!"
def test_different_types(cluster):
node = cluster.instances["node"]
response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames"))
assert len(response) > len(disk_types) # at least one extra line for header
name_col_ix = response[0].index("name")
type_col_ix = response[0].index("type")
encrypted_col_ix = response[0].index("is_encrypted")
for fields in response[1:]: # skip header
assert len(fields) >= 7
expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN")
if expected_disk_type != "Local":
disk_type = fields[response[0].index("object_storage_type")]
else:
disk_type = fields[type_col_ix]
assert (
expected_disk_type == disk_type
), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!"
if "encrypted" in fields[name_col_ix]:
assert (
fields[encrypted_col_ix] == "1"
), f"{fields[name_col_ix]} expected to be encrypted!"
else:
assert (
fields[encrypted_col_ix] == "0"
), f"{fields[name_col_ix]} expected to be non-encrypted!"
def test_select_by_type(cluster):
node = cluster.instances["node"]
for name, disk_type in list(disk_types.items()):
if disk_type != "s3":
if disk_type == "Local":
assert (
node.query(
"SELECT name FROM system.disks WHERE type='" + disk_type + "'"
)
== name + "\n"
)
else:
elif disk_type == "S3":
assert (
node.query(
"SELECT name FROM system.disks WHERE type='"
"SELECT name FROM system.disks WHERE object_storage_type='"
+ disk_type
+ "' ORDER BY name"
)
== "disk_encrypted\ndisk_s3\n"
)
else:
assert (
node.query(
"SELECT name FROM system.disks WHERE object_storage_type='"
+ disk_type
+ "'"
)
== name + "\n"
)

View File

@ -4,10 +4,10 @@ from helpers.test_tools import TSV
from pyhdfs import HdfsClient
disk_types = {
"default": "local",
"disk_s3": "s3",
"disk_hdfs": "hdfs",
"disk_encrypted": "s3",
"default": "Local",
"disk_s3": "S3",
"disk_hdfs": "HDFS",
"disk_encrypted": "S3",
}
@ -45,8 +45,15 @@ def test_different_types(cluster):
for fields in response[1:]: # skip header
assert len(fields) >= 7
expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN")
if expected_disk_type != "Local":
disk_type = fields[response[0].index("object_storage_type")]
else:
disk_type = fields[type_col_ix]
assert (
disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix]
expected_disk_type == disk_type
), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!"
if "encrypted" in fields[name_col_ix]:
assert (
@ -60,22 +67,29 @@ def test_different_types(cluster):
def test_select_by_type(cluster):
node = cluster.instances["node"]
fs = HdfsClient(hosts=cluster.hdfs_ip)
for name, disk_type in list(disk_types.items()):
if disk_type != "s3":
if disk_type == "Local":
assert (
node.query(
"SELECT name FROM system.disks WHERE type='" + disk_type + "'"
)
== name + "\n"
)
else:
elif disk_type == "S3":
assert (
node.query(
"SELECT name FROM system.disks WHERE type='"
"SELECT name FROM system.disks WHERE object_storage_type='"
+ disk_type
+ "' ORDER BY name"
)
== "disk_encrypted\ndisk_s3\n"
)
else:
assert (
node.query(
"SELECT name FROM system.disks WHERE object_storage_type='"
+ disk_type
+ "'"
)
== name + "\n"
)

View File

@ -0,0 +1,9 @@
<clickhouse>
<s3>
<use_environment_credentials>1</use_environment_credentials>
</s3>
<placement>
<use_imds>0</use_imds>
<availability_zone>ci-placeholder</availability_zone>
</placement>
</clickhouse>

View File

@ -2,16 +2,14 @@ import pytest
from helpers.cluster import ClickHouseCluster
from helpers.mock_servers import start_mock_servers
import os
import time
METADATA_SERVER_HOSTNAME = "resolver"
METADATA_SERVER_HOSTNAME = "node_imds"
METADATA_SERVER_PORT = 8080
cluster = ClickHouseCluster(__file__)
node_imds = cluster.add_instance(
"node_imds",
with_minio=True,
main_configs=["configs/imds.xml"],
main_configs=["configs/imds_bootstrap.xml"],
env_variables={
"AWS_EC2_METADATA_SERVICE_ENDPOINT": f"http://{METADATA_SERVER_HOSTNAME}:{METADATA_SERVER_PORT}",
},
@ -32,10 +30,10 @@ node_missing_value = cluster.add_instance(
)
def start_metadata_server():
def start_metadata_server(started_cluster):
script_dir = os.path.join(os.path.dirname(__file__), "metadata_servers")
start_mock_servers(
cluster,
started_cluster,
script_dir,
[
(
@ -51,13 +49,17 @@ def start_metadata_server():
def start_cluster():
try:
cluster.start()
start_metadata_server()
yield
start_metadata_server(cluster)
yield cluster
finally:
cluster.shutdown()
def test_placement_info_from_imds():
with open(os.path.join(os.path.dirname(__file__), "configs/imds.xml"), "r") as f:
node_imds.replace_config(
"/etc/clickhouse-server/config.d/imds_bootstrap.xml", f.read()
)
node_imds.stop_clickhouse(kill=True)
node_imds.start_clickhouse()

View File

@ -0,0 +1,14 @@
Nullable(UInt64), non-null array
1 1
\N 0
Non-nullable UInt64, nullable array
0 0
1 1
2 1
Nullable(UInt64), nullable array
0 0
\N 1
1 1
All NULLs
0 0
\N 1

View File

@ -0,0 +1,39 @@
DROP TABLE IF EXISTS 00662_has_nullable;
SELECT 'Nullable(UInt64), non-null array';
CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory;
INSERT INTO 00662_has_nullable VALUES (1), (Null);
SELECT a, has([0, 1], a) FROM 00662_has_nullable;
DROP TABLE 00662_has_nullable;
--------------------------------------------------------------------------------
SELECT 'Non-nullable UInt64, nullable array';
CREATE TABLE 00662_has_nullable(a UInt64) ENGINE = Memory;
INSERT INTO 00662_has_nullable VALUES (0), (1), (2);
SELECT a, has([NULL, 1, 2], a) FROM 00662_has_nullable;
DROP TABLE 00662_has_nullable;
--------------------------------------------------------------------------------
SELECT 'Nullable(UInt64), nullable array';
CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory;
INSERT INTO 00662_has_nullable VALUES (0), (Null), (1);
SELECT a, has([NULL, 1, 2], a) FROM 00662_has_nullable;
DROP TABLE 00662_has_nullable;
--------------------------------------------------------------------------------
SELECT 'All NULLs';
CREATE TABLE 00662_has_nullable(a Nullable(UInt64)) ENGINE = Memory;
INSERT INTO 00662_has_nullable VALUES (0), (Null);
SELECT a, has([NULL, NULL], a) FROM 00662_has_nullable;
DROP TABLE 00662_has_nullable;

View File

@ -3,6 +3,8 @@ CREATE TABLE too_many_parts (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTI
SYSTEM STOP MERGES too_many_parts;
SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
-- Avoid concurrent parts check to avoid flakiness
SET max_threads=1, max_insert_threads=1;
-- exception is not thrown if threshold is exceeded when multi-block INSERT is already started.
INSERT INTO too_many_parts SELECT * FROM numbers(10);

View File

@ -195,6 +195,8 @@ CREATE TABLE system.disks
`unreserved_space` UInt64,
`keep_free_space` UInt64,
`type` String,
`object_storage_type` String,
`metadata_type` String,
`is_encrypted` UInt8,
`is_read_only` UInt8,
`is_write_once` UInt8,

View File

@ -0,0 +1,20 @@
0
1
2
3
4
5
6
7
8
9
0
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet
$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet
$CLICKHOUSE_CLIENT -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet
$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet

View File

@ -644,14 +644,6 @@ shardNum
showCertificate
sigmoid
sign
simpleJSONExtractBool
simpleJSONExtractFloat
simpleJSONExtractInt
simpleJSONExtractRaw
simpleJSONExtractString
simpleJSONExtractUInt
simpleJSONHas
sin
sinh
sipHash128
sipHash128Keyed

View File

@ -1,2 +1,24 @@
11 queryfinish OK
11 querystart OK
"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","querystart","OK"
"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","queryfinish","OK"
"drop table if exists log_proxy_02572;","querystart","OK"
"drop table if exists log_proxy_02572;","queryfinish","OK"
"drop table if exists push_to_logs_proxy_mv_02572;","querystart","OK"
"drop table if exists push_to_logs_proxy_mv_02572;","queryfinish","OK"
"-- create log tables\nsystem flush logs;","querystart","OK"
"-- create log tables\nsystem flush logs;","queryfinish","OK"
"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","querystart","OK"
"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","queryfinish","OK"
"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","querystart","OK"
"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","queryfinish","OK"
"select 1 format Null;","querystart","OK"
"select 1 format Null;","queryfinish","OK"
"system flush logs;","querystart","OK"
"system flush logs;","queryfinish","OK"
"system flush logs;","querystart","OK"
"system flush logs;","queryfinish","OK"
"drop table log_proxy_02572;","querystart","OK"
"drop table log_proxy_02572;","queryfinish","OK"
"drop table push_to_logs_proxy_mv_02572;","querystart","OK"
"drop table push_to_logs_proxy_mv_02572;","queryfinish","OK"
"set log_queries=0;","querystart","OK"
"set log_queries=0;","queryfinish","OK"

View File

@ -21,10 +21,12 @@ system flush logs;
drop table log_proxy_02572;
drop table push_to_logs_proxy_mv_02572;
set log_queries=0;
system flush logs;
-- lower() to pass through clickhouse-test "exception" check
select count(), lower(type::String), errorCodeToName(exception_code)
select replaceAll(query, '\n', '\\n'), lower(type::String), errorCodeToName(exception_code)
from system.query_log
where current_database = currentDatabase()
group by 2, 3
order by 2;
order by event_time_microseconds
format CSV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env bash
## Note: The analyzer doesn't support JOIN with parallel replicas yet
# Tags: no-tsan, no-asan, no-msan
# It's not clear why distributed aggregation is much slower with sanitizers (https://github.com/ClickHouse/ClickHouse/issues/60625)
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -5,6 +5,6 @@
4 ttt
5 ttt
6 ttt
{"a":"1","obj":{"k1":1,"k2":null,"k3":null}}
{"a":"3","obj":{"k1":null,"k2":null,"k3":1}}
{"a":"1","obj":{"k1":1,"k2":null,"k3":null}}
1 [('k1',1)]
3 [('k3',1)]
1 [('k1',1)]

View File

@ -40,9 +40,9 @@ INSERT INTO t_mutations_subcolumns VALUES (2, '{"k2": 1}');
INSERT INTO t_mutations_subcolumns VALUES (3, '{"k3": 1}');
ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k2 = 1;
SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow;
SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a;
ALTER TABLE t_mutations_subcolumns DELETE WHERE isNull(obj.k1);
SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow;
SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a;
DROP TABLE t_mutations_subcolumns;

View File

@ -12,22 +12,34 @@ CONCURRENCY=200
echo "Creating $NUM_TABLES tables"
function get_done_or_die_trying()
{
# Sometimes curl produces errors like 'Recv failure: Connection reset by peer' and fails test, let's add a little bit of retries
for _ in $(seq 1 10)
do
curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "$1" &>/dev/null && return
done
echo "Cannot successfully make request"
exit 1
}
function init_table()
{
set -e
i=$1
curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()" 2>&1
curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()" 2>&1
curl $CLICKHOUSE_URL --silent --fail --show-error --data "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()" 2>&1
get_done_or_die_trying "CREATE TABLE test_02908_r1_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r1') ORDER BY tuple()"
get_done_or_die_trying "CREATE TABLE test_02908_r2_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r2') ORDER BY tuple()"
get_done_or_die_trying "CREATE TABLE test_02908_r3_$i (a UInt64) ENGINE=ReplicatedMergeTree('/02908/{database}/test_$i', 'r3') ORDER BY tuple()"
curl $CLICKHOUSE_URL --silent --fail --show-error --data "INSERT INTO test_02908_r1_$i SELECT rand64() FROM numbers(5);" 2>&1
get_done_or_die_trying "INSERT INTO test_02908_r1_$i SELECT rand64() FROM numbers(5);"
}
export init_table;
for i in `seq 1 $NUM_TABLES`;
for i in $(seq 1 $NUM_TABLES)
do
init_table $i &
init_table "$i" &
done
wait;
@ -35,15 +47,15 @@ wait;
echo "Making $CONCURRENCY requests to system.replicas"
for i in `seq 1 $CONCURRENCY`;
for i in $(seq 1 $CONCURRENCY)
do
curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" 2>&1 || echo "query $i failed" &
curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT * FROM system.replicas WHERE database=currentDatabase() FORMAT Null;" 2>&1 || echo "query $i failed" &
done
echo "Query system.replicas while waiting for other concurrent requests to finish"
# lost_part_count column is read from ZooKeeper
curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT sum(lost_part_count) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
# is_leader column is filled without ZooKeeper
curl $CLICKHOUSE_URL --silent --fail --show-error --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
curl "$CLICKHOUSE_URL" --silent --fail --show-error --data "SELECT sum(is_leader) FROM system.replicas WHERE database=currentDatabase();" 2>&1;
wait;

View File

@ -1,5 +1,12 @@
-- Tags: zookeeper, no-replicated-database
#!/usr/bin/env bash
# Tags: zookeeper, no-replicated-database
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --multiline --multiquery -q """
drop table if exists forget_partition;
create table forget_partition
@ -16,7 +23,12 @@ insert into forget_partition select number, '2024-01-01' + interval number day,
alter table forget_partition drop partition '20240101';
alter table forget_partition drop partition '20240102';
"""
# DROP PARTITION do not wait for a part to be removed from memory due to possible concurrent SELECTs, so we have to do wait manually here
while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition='20240101'") != 0 ]]; do sleep 0.1; done
${CLICKHOUSE_CLIENT} --multiline --multiquery -q """
set allow_unrestricted_reads_from_keeper=1;
select '---before---';
@ -31,3 +43,4 @@ select '---after---';
select name from system.zookeeper where path = '/test/02995/' || currentDatabase() || '/rmt/block_numbers' order by name;
drop table forget_partition;
"""

View File

@ -1,3 +1,5 @@
-- Tags: no-asan, no-tsan, no-msan, no-ubsan
DROP TABLE IF EXISTS test;
CREATE TABLE test (a UInt64, b UInt64, c UInt64) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 1;

View File

@ -0,0 +1,3 @@
0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA
070109000000010600000001080000000103000000010500000001040000000107000000 AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0)))
[3,4,5,6,7,8,9]

View File

@ -0,0 +1,6 @@
-- https://github.com/ClickHouse/ClickHouse/issues/61186
SELECT hex(CAST(unhex('0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA'),
'AggregateFunction(groupArraySorted(10), Decimal(38, 38))'));
Select hex(groupArraySortedState(10)((number < 3 ? NULL : number)::Nullable(Decimal(3))) as t), toTypeName(t) from numbers(10);
Select finalizeAggregation(unhex('070109000000010600000001080000000103000000010500000001040000000107000000')::AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0))));

View File

@ -0,0 +1,2 @@
CREATE DATABASE default
UNKNOWN_DATABASE

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "show database default"
$CLICKHOUSE_LOCAL -q "show database default2" 2>&1 | grep -o 'UNKNOWN_DATABASE'

View File

@ -1,4 +1,4 @@
personal_ws-1.1 en 2724
personal_ws-1.1 en 2758
AArch
ACLs
ALTERs
@ -845,7 +845,6 @@ SendScalars
ShareAlike
SharedMergeTree
Shortkeys
Shortkeys
SimHash
Simhash
SimpleAggregateFunction
@ -1705,7 +1704,6 @@ hyperscan
hypot
hyvor
iTerm
iTerm
icosahedron
icudata
idempotency
@ -2332,6 +2330,14 @@ shortcircuit
shortkeys
shoutout
simdjson
simpleJSON
simpleJSONExtractBool
simpleJSONExtractFloat
simpleJSONExtractInt
simpleJSONExtractRaw
simpleJSONExtractString
simpleJSONExtractUInt
simpleJSONHas
simpleLinearRegression
simpleaggregatefunction
simplelinearregression