mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'master' into hanfei/keeper_soft_limit
This commit is contained in:
commit
a316b70917
@ -385,9 +385,25 @@ endif ()
|
||||
|
||||
include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake")
|
||||
|
||||
# These files needs to be installed to make it possible that users can use well-known protobuf types
|
||||
set(google_proto_files
|
||||
${protobuf_source_dir}/src/google/protobuf/any.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/api.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/descriptor.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/duration.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/empty.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/field_mask.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/source_context.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/struct.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/timestamp.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/type.proto
|
||||
${protobuf_source_dir}/src/google/protobuf/wrappers.proto
|
||||
)
|
||||
|
||||
add_library(_protobuf INTERFACE)
|
||||
target_link_libraries(_protobuf INTERFACE _libprotobuf)
|
||||
target_include_directories(_protobuf INTERFACE "${Protobuf_INCLUDE_DIR}")
|
||||
set_target_properties(_protobuf PROPERTIES google_proto_files "${google_proto_files}")
|
||||
add_library(ch_contrib::protobuf ALIAS _protobuf)
|
||||
|
||||
add_library(_protoc INTERFACE)
|
||||
|
@ -56,7 +56,7 @@ Functions:
|
||||
|
||||
## Related content
|
||||
|
||||
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/)
|
||||
- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer)
|
||||
- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf)
|
||||
- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema)
|
||||
- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
|
||||
|
@ -2533,13 +2533,14 @@ formatDateTime(Time, Format[, Timezone])
|
||||
Returns time and date values according to the determined format.
|
||||
|
||||
**Replacement fields**
|
||||
|
||||
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
|
||||
|
||||
| Placeholder | Description | Example |
|
||||
| Placeholder | Description | Example |
|
||||
|----------|---------------------------------------------------------|------------|
|
||||
| %a | abbreviated weekday name (Mon-Sun) | Mon |
|
||||
| %b | abbreviated month name (Jan-Dec) | Jan |
|
||||
| %c | month as an integer number (01-12) | 01 |
|
||||
| %c | month as an integer number (01-12), see 'Note 3' below | 01 |
|
||||
| %C | year divided by 100 and truncated to integer (00-99) | 20 |
|
||||
| %d | day of the month, zero-padded (01-31) | 02 |
|
||||
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
|
||||
@ -2553,8 +2554,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
|
||||
| %i | minute (00-59) | 33 |
|
||||
| %I | hour in 12h format (01-12) | 10 |
|
||||
| %j | day of the year (001-366) | 002 |
|
||||
| %k | hour in 24h format (00-23) | 22 |
|
||||
| %l | hour in 12h format (01-12) | 09 |
|
||||
| %k | hour in 24h format (00-23), see 'Note 3' below | 14 |
|
||||
| %l | hour in 12h format (01-12), see 'Note 3' below | 09 |
|
||||
| %m | month as an integer number (01-12) | 01 |
|
||||
| %M | full month name (January-December), see 'Note 2' below | January |
|
||||
| %n | new-line character (‘’) | |
|
||||
@ -2579,6 +2580,8 @@ Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0)
|
||||
|
||||
Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
|
||||
|
||||
Note 3: In ClickHouse versions earlier than v23.11, function `parseDateTime()` required leading zeros for formatters `%c` (month) and `%l`/`%k` (hour), e.g. `07`. In later versions, the leading zero may be omitted, e.g. `7`. The previous behavior can be restored using setting `parsedatetime_parse_without_leading_zeros = 0`. Note that function `formatDateTime()` by default still prints leading zeros for `%c` and `%l`/`%k` to not break existing use cases. This behavior can be changed by setting `formatdatetime_format_without_leading_zeros = 1`.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
|
@ -164,7 +164,7 @@ Consider a list of contacts that may specify multiple ways to contact a customer
|
||||
└──────────┴──────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`.
|
||||
The `mail` and `phone` fields are of type String, but the `telegram` field is `UInt32`, so it needs to be converted to `String`.
|
||||
|
||||
Get the first available contact method for the customer from the contact list:
|
||||
|
||||
|
@ -67,7 +67,45 @@ WHERE macro = 'test';
|
||||
│ test │ Value │
|
||||
└───────┴──────────────┘
|
||||
```
|
||||
|
||||
## getClientHTTPHeader
|
||||
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getClientHTTPHeader(name);
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
|
||||
|
||||
**Returned value**
|
||||
|
||||
Value of the specified header.
|
||||
Type:[String](../../sql-reference/data-types/string.md#string).
|
||||
|
||||
|
||||
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
|
||||
```sql
|
||||
SELECT getCientHTTPHeader('test')
|
||||
```
|
||||
result:
|
||||
|
||||
```text
|
||||
┌─getClientHTTPHeader('test')─┐
|
||||
│ │
|
||||
└────────────------───────────┘
|
||||
```
|
||||
Try to use http request:
|
||||
```shell
|
||||
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
|
||||
|
||||
#result
|
||||
default
|
||||
```
|
||||
|
||||
## FQDN
|
||||
|
||||
Returns the fully qualified domain name of the ClickHouse server.
|
||||
|
86
docs/en/sql-reference/table-functions/fuzzJSON.md
Normal file
86
docs/en/sql-reference/table-functions/fuzzJSON.md
Normal file
@ -0,0 +1,86 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/fuzzJSON
|
||||
sidebar_position: 75
|
||||
sidebar_label: fuzzJSON
|
||||
---
|
||||
|
||||
# fuzzJSON
|
||||
|
||||
Perturbs a JSON string with random variations.
|
||||
|
||||
``` sql
|
||||
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `named_collection`- A [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md).
|
||||
- `option=value` - Named collection optional parameters and their values.
|
||||
- `json_str` (String) - The source string representing structured data in JSON format.
|
||||
- `random_seed` (UInt64) - Manual random seed for producing stable results.
|
||||
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
|
||||
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
|
||||
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
|
||||
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
|
||||
- `max_array_size` (UInt64) - The maximum allowed size of a JSON array.
|
||||
- `max_object_size` (UInt64) - The maximum allowed number of fields on a single level of a JSON object.
|
||||
- `max_string_value_length` (UInt64) - The maximum length of a String value.
|
||||
- `min_key_length` (UInt64) - The minimum key length. Should be at least 1.
|
||||
- `max_key_length` (UInt64) - The maximum key length. Should be greater or equal than the `min_key_length`, if specified.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A table object with a a single column containing perturbed JSON strings.
|
||||
|
||||
## Usage Example
|
||||
|
||||
``` sql
|
||||
CREATE NAMED COLLECTION json_fuzzer AS json_str='{}';
|
||||
SELECT * FROM fuzzJSON(json_fuzzer) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"52Xz2Zd4vKNcuP2":true}
|
||||
{"UPbOhOQAdPKIg91":3405264103600403024}
|
||||
{"X0QUWu8yT":[]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"name" : "value"}', random_seed=1234) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"key":"value", "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
|
||||
{"BRE3":true}
|
||||
{"key":"value", "SWzJdEJZ04nrpSfy":[{"3Q23y":[]}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', reuse_output=true) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"students":["Alice", "Bob"], "nwALnRMc4pyKD9Krv":[]}
|
||||
{"students":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}]}
|
||||
{"xeEk":["1rNY5ZNs0wU&82t_P", "Bob"], "wLNRGzwDiMKdw":[{}, {}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_fuzzer, json_str='{"students" : ["Alice", "Bob"]}', max_output_length=512) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"students":["Alice", "Bob"], "BREhhXj5":true}
|
||||
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true}
|
||||
{"NyEsSWzJdeJZ04s":["Alice", 5737924650575683711, 5346334167565345826], "BjVO2X9L":true, "k1SXzbSIz":[{}]}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
{"id":1, "mxPG0h1R5":"L-YQLv@9hcZbOIGrAn10%GA"}
|
||||
{"BRjE":16137826149911306846}
|
||||
{"XjKE":15076727133550123563}
|
||||
```
|
@ -44,6 +44,8 @@ contents:
|
||||
dst: /usr/bin/clickhouse-odbc-bridge
|
||||
- src: root/usr/share/bash-completion/completions
|
||||
dst: /usr/share/bash-completion/completions
|
||||
- src: root/usr/share/clickhouse
|
||||
dst: /usr/share/clickhouse
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static/AUTHORS
|
||||
|
@ -457,3 +457,10 @@ endif()
|
||||
if (ENABLE_FUZZING)
|
||||
add_compile_definitions(FUZZING_MODE=1)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::protobuf)
|
||||
get_property(google_proto_files TARGET ch_contrib::protobuf PROPERTY google_proto_files)
|
||||
foreach (proto_file IN LISTS google_proto_files)
|
||||
install(FILES ${proto_file} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse/protos/google/protobuf)
|
||||
endforeach()
|
||||
endif ()
|
||||
|
@ -306,6 +306,10 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
/// Set path for format schema files
|
||||
if (config().has("format_schema_path"))
|
||||
global_context->setFormatSchemaPath(fs::weakly_canonical(config().getString("format_schema_path")));
|
||||
|
||||
/// Set the path for google proto files
|
||||
if (config().has("google_protos_path"))
|
||||
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@
|
||||
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
|
||||
</prompt_by_server_display_name>
|
||||
|
||||
<!--
|
||||
<!--
|
||||
Settings adjustable via command-line parameters
|
||||
can take their defaults from that config file, see examples:
|
||||
|
||||
@ -58,6 +58,9 @@
|
||||
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
|
||||
-->
|
||||
|
||||
<!-- Directory containing the proto files for the well-known Protobuf types.
|
||||
-->
|
||||
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
|
||||
|
||||
<!-- Analog of .netrc -->
|
||||
<![CDATA[
|
||||
|
@ -1279,6 +1279,8 @@ try
|
||||
global_context->setHTTPHeaderFilter(*config);
|
||||
|
||||
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
|
||||
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
|
||||
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
|
||||
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
|
||||
|
||||
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
|
||||
@ -1575,6 +1577,10 @@ try
|
||||
global_context->setFormatSchemaPath(format_schema_path);
|
||||
fs::create_directories(format_schema_path);
|
||||
|
||||
/// Set the path for google proto files
|
||||
if (config().has("google_protos_path"))
|
||||
global_context->setGoogleProtosPath(fs::weakly_canonical(config().getString("google_protos_path")));
|
||||
|
||||
/// Set path for filesystem caches
|
||||
fs::path filesystem_caches_path(config().getString("filesystem_caches_path", ""));
|
||||
if (!filesystem_caches_path.empty())
|
||||
|
@ -3,6 +3,7 @@
|
||||
<tmp_path replace="replace">./tmp/</tmp_path>
|
||||
<user_files_path replace="replace">./user_files/</user_files_path>
|
||||
<format_schema_path replace="replace">./format_schemas/</format_schema_path>
|
||||
<google_protos_path replace="replace">../../contrib/google-protobuf/src/</google_protos_path>
|
||||
<access_control_path replace="replace">./access/</access_control_path>
|
||||
<top_level_domains_path replace="replace">./top_level_domains/</top_level_domains_path>
|
||||
</clickhouse>
|
||||
|
@ -1428,6 +1428,10 @@
|
||||
-->
|
||||
<format_schema_path>/var/lib/clickhouse/format_schemas/</format_schema_path>
|
||||
|
||||
<!-- Directory containing the proto files for the well-known Protobuf types.
|
||||
-->
|
||||
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
|
||||
|
||||
<!-- Default query masking rules, matching lines would be replaced with something else in the logs
|
||||
(both text logs and system.query_log).
|
||||
name - name for the rule (optional)
|
||||
|
@ -77,7 +77,7 @@ public:
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
data(place).count += countBytesInFilter(flags);
|
||||
data(place).count += countBytesInFilter(flags.data(), row_begin, row_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -142,6 +142,7 @@ struct AggregateFunctionSumData
|
||||
), addManyConditionalInternalImpl, MULTITARGET_FUNCTION_BODY((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
|
||||
{
|
||||
ptr += start;
|
||||
condition_map += start;
|
||||
size_t count = end - start;
|
||||
const auto * end_ptr = ptr + count;
|
||||
|
||||
|
@ -289,15 +289,6 @@ public:
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1) const = 0;
|
||||
|
||||
virtual void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const = 0;
|
||||
|
||||
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
|
||||
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
|
||||
* -Array combinator. It might also be used generally to break data dependency when array
|
||||
@ -586,31 +577,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlaceFromInterval( /// NOLINT
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
AggregateDataPtr __restrict place,
|
||||
const IColumn ** columns,
|
||||
Arena * arena,
|
||||
ssize_t if_argument_pos = -1)
|
||||
const override
|
||||
{
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
{
|
||||
if (flags[i])
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = row_begin; i < row_end; ++i)
|
||||
static_cast<const Derived *>(this)->add(place, columns, i, arena);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchArray(
|
||||
size_t row_begin,
|
||||
size_t row_end,
|
||||
|
@ -1,134 +0,0 @@
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/LambdaNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class AnyFunctionViMoveFunctionsOutOfAnyVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>
|
||||
{
|
||||
public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionViMoveFunctionsOutOfAnyVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_move_functions_out_of_any)
|
||||
return;
|
||||
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node)
|
||||
return;
|
||||
|
||||
/// check function is any
|
||||
const auto & function_name = function_node->getFunctionName();
|
||||
if (function_name != "any" && function_name != "anyLast")
|
||||
return;
|
||||
|
||||
auto & arguments = function_node->getArguments().getNodes();
|
||||
if (arguments.size() != 1)
|
||||
return;
|
||||
|
||||
auto * inside_function_node = arguments[0]->as<FunctionNode>();
|
||||
|
||||
/// check argument is a function
|
||||
if (!inside_function_node)
|
||||
return;
|
||||
|
||||
/// check arguments can not contain arrayJoin or lambda
|
||||
if (!canRewrite(inside_function_node))
|
||||
return;
|
||||
|
||||
auto & inside_function_node_arguments = inside_function_node->getArguments().getNodes();
|
||||
|
||||
/// case any(f())
|
||||
if (inside_function_node_arguments.empty())
|
||||
return;
|
||||
|
||||
auto it = node_to_rewritten_node.find(node.get());
|
||||
if (it != node_to_rewritten_node.end())
|
||||
{
|
||||
node = it->second;
|
||||
return;
|
||||
}
|
||||
|
||||
/// checking done, rewrite function
|
||||
bool changed_argument = false;
|
||||
for (auto & inside_argument : inside_function_node_arguments)
|
||||
{
|
||||
if (inside_argument->as<ConstantNode>()) /// skip constant node
|
||||
break;
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
|
||||
|
||||
auto any_function = std::make_shared<FunctionNode>(function_name);
|
||||
any_function->resolveAsAggregateFunction(std::move(aggregate_function));
|
||||
|
||||
auto & any_function_arguments = any_function->getArguments().getNodes();
|
||||
any_function_arguments.push_back(std::move(inside_argument));
|
||||
|
||||
inside_argument = std::move(any_function);
|
||||
changed_argument = true;
|
||||
}
|
||||
|
||||
if (changed_argument)
|
||||
{
|
||||
node_to_rewritten_node.emplace(node.get(), arguments[0]);
|
||||
node = arguments[0];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool canRewrite(const FunctionNode * function_node)
|
||||
{
|
||||
for (const auto & argument : function_node->getArguments().getNodes())
|
||||
{
|
||||
if (argument->as<LambdaNode>())
|
||||
return false;
|
||||
|
||||
if (const auto * inside_function = argument->as<FunctionNode>())
|
||||
{
|
||||
/// Function arrayJoin is special and should be skipped (think about it as
|
||||
/// an aggregate function), otherwise wrong result will be produced.
|
||||
/// For example:
|
||||
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
|
||||
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
|
||||
/// │ 0 │ [] │
|
||||
/// │ 0 │ [] │
|
||||
/// └────────┴────────────────────────────────────┘
|
||||
if (inside_function->getFunctionName() == "arrayJoin")
|
||||
return false;
|
||||
|
||||
if (!canRewrite(inside_function))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// After query analysis, alias identifier will be resolved to node whose memory address is same with the original one.
|
||||
/// So we can reuse the rewritten function.
|
||||
std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr> node_to_rewritten_node;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void MoveFunctionsOutOfAnyPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
AnyFunctionViMoveFunctionsOutOfAnyVisitor visitor(context);
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
|
||||
*
|
||||
* Example: SELECT any(f(x, y, g(z)));
|
||||
* Result: SELECT f(any(x), any(y), g(any(z)));
|
||||
*/
|
||||
class MoveFunctionsOutOfAnyPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "MoveFunctionsOutOfAnyPass"; }
|
||||
|
||||
String getDescription() override
|
||||
{
|
||||
return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
|
||||
}
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -44,7 +44,6 @@
|
||||
#include <Analyzer/Passes/CrossToInnerJoinPass.h>
|
||||
#include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
|
||||
#include <Analyzer/Passes/ConvertQueryToCNFPass.h>
|
||||
#include <Analyzer/Passes/MoveFunctionsOutOfAnyPass.h>
|
||||
#include <Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.h>
|
||||
|
||||
|
||||
@ -284,7 +283,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
manager.addPass(std::make_unique<CrossToInnerJoinPass>());
|
||||
manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
|
||||
|
||||
manager.addPass(std::make_unique<MoveFunctionsOutOfAnyPass>());
|
||||
manager.addPass(std::make_unique<OptimizeDateOrDateTimeConverterWithPreimagePass>());
|
||||
|
||||
}
|
||||
|
@ -98,6 +98,8 @@ namespace DB
|
||||
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
|
||||
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
M(String, get_client_http_header_forbidden_headers, "", "Comma separated list of http header names that will not be returned by function getClientHTTPHeader.", 0) \
|
||||
M(Bool, allow_get_client_http_header, false, "Allow function getClientHTTPHeader", 0) \
|
||||
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
|
||||
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
|
||||
|
||||
|
@ -512,8 +512,10 @@ class IColumn;
|
||||
M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
|
||||
\
|
||||
M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
|
||||
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
|
||||
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
|
||||
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
|
||||
M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes.", 0) \
|
||||
M(Bool, parsedatetime_parse_without_leading_zeros, true, "Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.", 0) \
|
||||
M(Bool, formatdatetime_format_without_leading_zeros, false, "Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.", 0) \
|
||||
\
|
||||
M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
|
||||
M(Bool, throw_on_max_partitions_per_insert_block, true, "Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.", 0) \
|
||||
@ -554,7 +556,6 @@ class IColumn;
|
||||
M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
|
||||
M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
|
||||
M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
|
||||
M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
|
||||
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
|
||||
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
|
||||
M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, "Rewrite countDistinctIf with count_distinct_implementation configuration", 0) \
|
||||
@ -893,6 +894,7 @@ class IColumn;
|
||||
MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
|
||||
MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
|
||||
MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
|
||||
MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
|
||||
|
||||
/** The section above is for obsolete settings. Do not add anything there. */
|
||||
|
||||
|
@ -124,6 +124,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
|
||||
{"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
|
||||
{"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
|
||||
{"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
|
||||
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
|
||||
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
|
||||
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
|
||||
|
@ -151,6 +151,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.protobuf.output_nullables_with_google_wrappers = settings.output_format_protobuf_nullables_with_google_wrappers;
|
||||
format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference;
|
||||
format_settings.protobuf.use_autogenerated_schema = settings.format_protobuf_use_autogenerated_schema;
|
||||
format_settings.protobuf.google_protos_path = context->getGoogleProtosPath();
|
||||
format_settings.regexp.escaping_rule = settings.format_regexp_escaping_rule;
|
||||
format_settings.regexp.regexp = settings.format_regexp;
|
||||
format_settings.regexp.skip_unmatched = settings.format_regexp_skip_unmatched;
|
||||
|
@ -295,6 +295,7 @@ struct FormatSettings
|
||||
bool allow_multiple_rows_without_delimiter = false;
|
||||
bool skip_fields_with_unsupported_types_in_schema_inference = false;
|
||||
bool use_autogenerated_schema = true;
|
||||
std::string google_protos_path;
|
||||
} protobuf;
|
||||
|
||||
struct
|
||||
|
@ -30,11 +30,11 @@ void ProtobufSchemas::clear()
|
||||
class ProtobufSchemas::ImporterWithSourceTree : public google::protobuf::compiler::MultiFileErrorCollector
|
||||
{
|
||||
public:
|
||||
explicit ImporterWithSourceTree(const String & schema_directory, WithEnvelope with_envelope_)
|
||||
: importer(&disk_source_tree, this)
|
||||
, with_envelope(with_envelope_)
|
||||
explicit ImporterWithSourceTree(const String & schema_directory, const String & google_protos_path, WithEnvelope with_envelope_)
|
||||
: importer(&disk_source_tree, this), with_envelope(with_envelope_)
|
||||
{
|
||||
disk_source_tree.MapPath("", schema_directory);
|
||||
disk_source_tree.MapPath("", google_protos_path);
|
||||
}
|
||||
|
||||
~ImporterWithSourceTree() override = default;
|
||||
@ -112,12 +112,17 @@ private:
|
||||
};
|
||||
|
||||
|
||||
const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope)
|
||||
const google::protobuf::Descriptor *
|
||||
ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = importers.find(info.schemaDirectory());
|
||||
if (it == importers.end())
|
||||
it = importers.emplace(info.schemaDirectory(), std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), with_envelope)).first;
|
||||
it = importers
|
||||
.emplace(
|
||||
info.schemaDirectory(),
|
||||
std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
|
||||
.first;
|
||||
auto * importer = it->second.get();
|
||||
return importer->import(info.schemaPath(), info.messageName());
|
||||
}
|
||||
|
@ -59,7 +59,8 @@ public:
|
||||
|
||||
/// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
|
||||
/// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
|
||||
const google::protobuf::Descriptor * getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope);
|
||||
const google::protobuf::Descriptor *
|
||||
getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
|
||||
|
||||
private:
|
||||
class ImporterWithSourceTree;
|
||||
|
@ -195,7 +195,11 @@ struct ArrayElementNumImpl
|
||||
|
||||
if (index < array_size)
|
||||
{
|
||||
size_t j = !negative ? (current_offset + index) : (offsets[i] - index - 1);
|
||||
size_t j;
|
||||
if constexpr (negative)
|
||||
j = offsets[i] - index - 1;
|
||||
else
|
||||
j = current_offset + index;
|
||||
result[i] = data[j];
|
||||
if (builder)
|
||||
builder.update(j);
|
||||
@ -260,7 +264,7 @@ struct ArrayElementNumImpl
|
||||
|
||||
struct ArrayElementStringImpl
|
||||
{
|
||||
template <bool negative>
|
||||
template <bool negative, bool used_builder>
|
||||
static void vectorConst(
|
||||
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
|
||||
const ColumnArray::Offset index,
|
||||
@ -269,21 +273,31 @@ struct ArrayElementStringImpl
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
result_offsets.resize(size);
|
||||
result_data.reserve(data.size());
|
||||
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
ColumnArray::Offset current_result_offset = 0;
|
||||
/// get the total result bytes at first, and reduce the cost of result_data.resize.
|
||||
size_t total_result_bytes = 0;
|
||||
ColumnString::Chars zero_buf(1);
|
||||
zero_buf.push_back(0);
|
||||
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
|
||||
selected_bufs.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t array_size = offsets[i] - current_offset;
|
||||
|
||||
if (index < array_size)
|
||||
{
|
||||
size_t adjusted_index = !negative ? index : (array_size - index - 1);
|
||||
size_t adjusted_index;
|
||||
if constexpr (negative)
|
||||
adjusted_index = array_size - index - 1;
|
||||
else
|
||||
adjusted_index = index;
|
||||
|
||||
size_t j = current_offset + adjusted_index;
|
||||
if (builder)
|
||||
if constexpr (used_builder)
|
||||
{
|
||||
size_t j = current_offset + adjusted_index;
|
||||
builder.update(j);
|
||||
}
|
||||
|
||||
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
|
||||
? 0
|
||||
@ -291,30 +305,36 @@ struct ArrayElementStringImpl
|
||||
|
||||
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
|
||||
|
||||
result_data.resize(current_result_offset + string_size);
|
||||
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
|
||||
current_result_offset += string_size;
|
||||
result_offsets[i] = current_result_offset;
|
||||
total_result_bytes += string_size;
|
||||
selected_bufs.emplace_back(&data[string_pos], string_size);
|
||||
result_offsets[i] = total_result_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Insert an empty row.
|
||||
result_data.resize(current_result_offset + 1);
|
||||
result_data[current_result_offset] = 0;
|
||||
current_result_offset += 1;
|
||||
result_offsets[i] = current_result_offset;
|
||||
total_result_bytes += 1;
|
||||
selected_bufs.emplace_back(zero_buf.data(), 1);
|
||||
result_offsets[i] = total_result_bytes;
|
||||
|
||||
if (builder)
|
||||
if constexpr (used_builder)
|
||||
builder.update();
|
||||
}
|
||||
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
|
||||
ColumnArray::Offset current_result_offset = 0;
|
||||
result_data.resize(total_result_bytes);
|
||||
for (const auto & buf : selected_bufs)
|
||||
{
|
||||
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
|
||||
current_result_offset += buf.second;
|
||||
}
|
||||
}
|
||||
|
||||
/** Implementation for non-constant index.
|
||||
*/
|
||||
template <typename TIndex>
|
||||
template <typename TIndex, bool used_builder>
|
||||
static void vector(
|
||||
const ColumnString::Chars & data, const ColumnArray::Offsets & offsets, const ColumnString::Offsets & string_offsets,
|
||||
const PaddedPODArray<TIndex> & indices,
|
||||
@ -323,10 +343,14 @@ struct ArrayElementStringImpl
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
result_offsets.resize(size);
|
||||
result_data.reserve(data.size());
|
||||
|
||||
ColumnString::Chars zero_buf(1);
|
||||
zero_buf.push_back(0);
|
||||
ColumnArray::Offset current_offset = 0;
|
||||
ColumnArray::Offset current_result_offset = 0;
|
||||
/// get the total result bytes at first, and reduce the cost of result_data.resize.
|
||||
size_t total_result_bytes = 0;
|
||||
std::vector<std::pair<const ColumnString::Char *, UInt64>> selected_bufs;
|
||||
selected_bufs.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t array_size = offsets[i] - current_offset;
|
||||
@ -342,35 +366,43 @@ struct ArrayElementStringImpl
|
||||
|
||||
if (adjusted_index < array_size)
|
||||
{
|
||||
size_t j = current_offset + adjusted_index;
|
||||
if (builder)
|
||||
if constexpr (used_builder)
|
||||
{
|
||||
size_t j = current_offset + adjusted_index;
|
||||
builder.update(j);
|
||||
}
|
||||
|
||||
ColumnArray::Offset string_pos = current_offset == 0 && adjusted_index == 0
|
||||
? 0
|
||||
: string_offsets[current_offset + adjusted_index - 1];
|
||||
|
||||
ColumnArray::Offset string_size = string_offsets[current_offset + adjusted_index] - string_pos;
|
||||
total_result_bytes += string_size;
|
||||
selected_bufs.emplace_back(&data[string_pos], string_size);
|
||||
|
||||
result_data.resize(current_result_offset + string_size);
|
||||
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], &data[string_pos], string_size);
|
||||
current_result_offset += string_size;
|
||||
result_offsets[i] = current_result_offset;
|
||||
result_offsets[i] = total_result_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Insert empty string
|
||||
result_data.resize(current_result_offset + 1);
|
||||
result_data[current_result_offset] = 0;
|
||||
current_result_offset += 1;
|
||||
result_offsets[i] = current_result_offset;
|
||||
total_result_bytes += 1;
|
||||
selected_bufs.emplace_back(zero_buf.data(), 1);
|
||||
result_offsets[i] = total_result_bytes;
|
||||
|
||||
if (builder)
|
||||
if constexpr (used_builder)
|
||||
builder.update();
|
||||
}
|
||||
|
||||
current_offset = offsets[i];
|
||||
}
|
||||
|
||||
ColumnArray::Offset current_result_offset = 0;
|
||||
result_data.resize(total_result_bytes);
|
||||
for (const auto & buf : selected_bufs)
|
||||
{
|
||||
memcpySmallAllowReadWriteOverflow15(&result_data[current_result_offset], buf.first, buf.second);
|
||||
current_result_offset += buf.second;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -542,23 +574,47 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument
|
||||
|
||||
if (index.getType() == Field::Types::UInt64
|
||||
|| (index.getType() == Field::Types::Int64 && index.get<Int64>() >= 0))
|
||||
ArrayElementStringImpl::vectorConst<false>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
index.get<UInt64>() - 1,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
{
|
||||
if (builder)
|
||||
ArrayElementStringImpl::vectorConst<false, true>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
index.get<UInt64>() - 1,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
else
|
||||
ArrayElementStringImpl::vectorConst<false, false>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
index.get<UInt64>() - 1,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
}
|
||||
else if (index.getType() == Field::Types::Int64)
|
||||
ArrayElementStringImpl::vectorConst<true>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
-(UInt64(index.get<Int64>()) + 1),
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
{
|
||||
if (builder)
|
||||
ArrayElementStringImpl::vectorConst<true, true>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
-(UInt64(index.get<Int64>()) + 1),
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
else
|
||||
ArrayElementStringImpl::vectorConst<true, false>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
-(UInt64(index.get<Int64>()) + 1),
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index");
|
||||
|
||||
@ -580,14 +636,25 @@ ColumnPtr FunctionArrayElement::executeString(
|
||||
return nullptr;
|
||||
|
||||
auto col_res = ColumnString::create();
|
||||
ArrayElementStringImpl::vector<IndexType>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
indices,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
|
||||
if (builder)
|
||||
ArrayElementStringImpl::vector<IndexType, true>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
indices,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
else
|
||||
ArrayElementStringImpl::vector<IndexType, false>(
|
||||
col_nested->getChars(),
|
||||
col_array->getOffsets(),
|
||||
col_nested->getOffsets(),
|
||||
indices,
|
||||
col_res->getChars(),
|
||||
col_res->getOffsets(),
|
||||
builder);
|
||||
|
||||
return col_res;
|
||||
}
|
||||
|
@ -322,6 +322,18 @@ private:
|
||||
return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
|
||||
}
|
||||
|
||||
size_t mysqlMonthWithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto month = ToMonthImpl::execute(source, timezone);
|
||||
if (month < 10)
|
||||
{
|
||||
dest[0] = '0' + month;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return writeNumber2(dest, month);
|
||||
}
|
||||
|
||||
static size_t monthOfYearText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto month = ToMonthImpl::execute(source, timezone);
|
||||
@ -404,10 +416,36 @@ private:
|
||||
return writeNumber2(dest, ToHourImpl::execute(source, timezone));
|
||||
}
|
||||
|
||||
size_t mysqlHour24WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto hour = ToHourImpl::execute(source, timezone);
|
||||
if (hour < 10)
|
||||
{
|
||||
dest[0] = '0' + hour;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return writeNumber2(dest, hour);
|
||||
}
|
||||
|
||||
size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto x = ToHourImpl::execute(source, timezone);
|
||||
return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
|
||||
auto hour = ToHourImpl::execute(source, timezone);
|
||||
hour = (hour == 0) ? 12 : (hour > 12 ? hour - 12 : hour);
|
||||
return writeNumber2(dest, hour);
|
||||
}
|
||||
|
||||
size_t mysqlHour12WithoutLeadingZero(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto hour = ToHourImpl::execute(source, timezone);
|
||||
hour = hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour);
|
||||
if (hour < 10)
|
||||
{
|
||||
dest[0] = '0' + hour;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return writeNumber2(dest, hour);
|
||||
}
|
||||
|
||||
size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
@ -689,10 +727,11 @@ private:
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
|
||||
}
|
||||
|
||||
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
|
||||
static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name, bool mysql_format_ckl_without_leading_zeros)
|
||||
{
|
||||
static constexpr std::array variable_width_formatter = {'W'};
|
||||
static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
|
||||
static constexpr std::array variable_width_formatter_leading_zeros = {'c', 'l', 'k'};
|
||||
|
||||
for (size_t i = 0; i < format.size(); ++i)
|
||||
{
|
||||
@ -708,6 +747,13 @@ private:
|
||||
[&](char c){ return c == format[i + 1]; }))
|
||||
return false;
|
||||
}
|
||||
if (mysql_format_ckl_without_leading_zeros)
|
||||
{
|
||||
if (std::any_of(
|
||||
variable_width_formatter_leading_zeros.begin(), variable_width_formatter_leading_zeros.end(),
|
||||
[&](char c){ return c == format[i + 1]; }))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (std::any_of(
|
||||
@ -727,6 +773,7 @@ private:
|
||||
|
||||
const bool mysql_M_is_month_name;
|
||||
const bool mysql_f_prints_single_zero;
|
||||
const bool mysql_format_ckl_without_leading_zeros;
|
||||
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
@ -736,6 +783,7 @@ public:
|
||||
explicit FunctionFormatDateTimeImpl(ContextPtr context)
|
||||
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
|
||||
, mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
|
||||
, mysql_format_ckl_without_leading_zeros(context->getSettings().formatdatetime_format_without_leading_zeros)
|
||||
{
|
||||
}
|
||||
|
||||
@ -885,7 +933,7 @@ public:
|
||||
/// column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
|
||||
/// instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
|
||||
/// written which is obviously slow.
|
||||
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name) : false;
|
||||
bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name, mysql_format_ckl_without_leading_zeros) : false;
|
||||
|
||||
using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
|
||||
std::vector<Instruction<T>> instructions;
|
||||
@ -1077,12 +1125,22 @@ public:
|
||||
break;
|
||||
}
|
||||
|
||||
// Month as a integer number (01-12)
|
||||
// Month as a integer number:
|
||||
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 1-12
|
||||
// - otherwise: print with leading zeros: i.e. 01-12
|
||||
case 'c':
|
||||
{
|
||||
Instruction<T> instruction;
|
||||
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
|
||||
instructions.push_back(std::move(instruction));
|
||||
if (mysql_format_ckl_without_leading_zeros)
|
||||
{
|
||||
instruction.setMysqlFunc(&Instruction<T>::mysqlMonthWithoutLeadingZero);
|
||||
instructions.push_back(std::move(instruction));
|
||||
}
|
||||
else
|
||||
{
|
||||
instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
|
||||
instructions.push_back(std::move(instruction));
|
||||
}
|
||||
out_template += "00";
|
||||
break;
|
||||
}
|
||||
@ -1391,20 +1449,30 @@ public:
|
||||
break;
|
||||
}
|
||||
|
||||
// Hour in 24h format (00-23)
|
||||
// Hour in 24h format:
|
||||
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-23
|
||||
// - otherwise: print with leading zeros: i.e. 00-23
|
||||
case 'k':
|
||||
{
|
||||
static constexpr std::string_view val = "00";
|
||||
add_time_instruction(&Instruction<T>::mysqlHour24, val);
|
||||
if (mysql_format_ckl_without_leading_zeros)
|
||||
add_time_instruction(&Instruction<T>::mysqlHour24WithoutLeadingZero, val);
|
||||
else
|
||||
add_time_instruction(&Instruction<T>::mysqlHour24, val);
|
||||
out_template += val;
|
||||
break;
|
||||
}
|
||||
|
||||
// Hour in 12h format (01-12)
|
||||
// Hour in 12h format:
|
||||
// - if formatdatetime_format_without_leading_zeros = true: prints without leading zero, i.e. 0-12
|
||||
// - otherwise: print with leading zeros: i.e. 00-12
|
||||
case 'l':
|
||||
{
|
||||
static constexpr std::string_view val = "12";
|
||||
add_time_instruction(&Instruction<T>::mysqlHour12, val);
|
||||
if (mysql_format_ckl_without_leading_zeros)
|
||||
add_time_instruction(&Instruction<T>::mysqlHour12WithoutLeadingZero, val);
|
||||
else
|
||||
add_time_instruction(&Instruction<T>::mysqlHour12, val);
|
||||
out_template += val;
|
||||
break;
|
||||
}
|
||||
|
116
src/Functions/getClientHTTPHeader.cpp
Normal file
116
src/Functions/getClientHTTPHeader.cpp
Normal file
@ -0,0 +1,116 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include "Disks/DiskType.h"
|
||||
#include "Interpreters/Context_fwd.h"
|
||||
#include <Core/Field.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int FUNCTION_NOT_ALLOWED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
/** Get the value of parameter in http headers.
|
||||
* If there no such parameter or the method of request is not
|
||||
* http, the function will throw an exception.
|
||||
*/
|
||||
class FunctionGetClientHTTPHeader : public IFunction, WithContext
|
||||
{
|
||||
private:
|
||||
|
||||
public:
|
||||
explicit FunctionGetClientHTTPHeader(ContextPtr context_): WithContext(context_) {}
|
||||
|
||||
static constexpr auto name = "getClientHTTPHeader";
|
||||
|
||||
static FunctionPtr create(ContextPtr context_)
|
||||
{
|
||||
return std::make_shared<FunctionGetClientHTTPHeader>(context_);
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!getContext()->allowGetHTTPHeaderFunction())
|
||||
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "The function {} is not enabled, you can set allow_get_client_http_header in config file.", getName());
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must have String type", getName());
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
const auto & client_info = getContext()->getClientInfo();
|
||||
const auto & method = client_info.http_method;
|
||||
const auto & headers = client_info.headers;
|
||||
const IColumn * arg_column = arguments[0].column.get();
|
||||
const ColumnString * arg_string = checkAndGetColumn<ColumnString>(arg_column);
|
||||
|
||||
if (!arg_string)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant String", getName());
|
||||
|
||||
if (method != ClientInfo::HTTPMethod::GET && method != ClientInfo::HTTPMethod::POST)
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
|
||||
auto result_column = ColumnString::create();
|
||||
|
||||
const String default_value;
|
||||
const std::unordered_set<String> & forbidden_header_list = getContext()->getClientHTTPHeaderForbiddenHeaders();
|
||||
|
||||
for (size_t row = 0; row < input_rows_count; ++row)
|
||||
{
|
||||
auto header_name = arg_string->getDataAt(row).toString();
|
||||
|
||||
if (!headers.has(header_name))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} is not in HTTP request headers.", header_name);
|
||||
else
|
||||
{
|
||||
auto it = forbidden_header_list.find(header_name);
|
||||
if (it != forbidden_header_list.end())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The header {} is in get_client_http_header_forbidden_headers, you can config it in config file.", header_name);
|
||||
|
||||
const String & value = headers[header_name];
|
||||
result_column->insertData(value.data(), value.size());
|
||||
}
|
||||
}
|
||||
|
||||
return result_column;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(GetHttpHeader)
|
||||
{
|
||||
factory.registerFunction<FunctionGetClientHTTPHeader>();
|
||||
}
|
||||
|
||||
}
|
@ -466,12 +466,14 @@ namespace
|
||||
{
|
||||
public:
|
||||
const bool mysql_M_is_month_name;
|
||||
const bool mysql_parse_ckl_without_leading_zeros;
|
||||
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
|
||||
|
||||
explicit FunctionParseDateTimeImpl(ContextPtr context)
|
||||
: mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
|
||||
, mysql_parse_ckl_without_leading_zeros(context->getSettings().parsedatetime_parse_without_leading_zeros)
|
||||
{
|
||||
}
|
||||
|
||||
@ -835,6 +837,14 @@ namespace
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos mysqlMonthWithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
|
||||
{
|
||||
Int32 month;
|
||||
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, month);
|
||||
date.setMonth(month);
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos mysqlCentury(Pos cur, Pos end, const String & fragment, DateTime & date)
|
||||
{
|
||||
Int32 century;
|
||||
@ -1131,6 +1141,14 @@ namespace
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos mysqlHour12WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
|
||||
{
|
||||
Int32 hour;
|
||||
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
|
||||
date.setHour(hour, true, true);
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos mysqlHour24(Pos cur, Pos end, const String & fragment, DateTime & date)
|
||||
{
|
||||
Int32 hour;
|
||||
@ -1139,6 +1157,14 @@ namespace
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos mysqlHour24WithoutLeadingZero(Pos cur, Pos end, const String & fragment, DateTime & date)
|
||||
{
|
||||
Int32 hour;
|
||||
cur = readNumberWithVariableLength(cur, end, false, false, false, 1, 2, fragment, hour);
|
||||
date.setHour(hour, false, false);
|
||||
return cur;
|
||||
}
|
||||
|
||||
static Pos readNumberWithVariableLength(
|
||||
Pos cur,
|
||||
Pos end,
|
||||
@ -1490,9 +1516,14 @@ namespace
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextShort));
|
||||
break;
|
||||
|
||||
// Month as a decimal number (01-12)
|
||||
// Month as a decimal number:
|
||||
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 1-12
|
||||
// - else: with leading zero required, i.e. 01-12
|
||||
case 'c':
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
|
||||
if (mysql_parse_ckl_without_leading_zeros)
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthWithoutLeadingZero));
|
||||
else
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth));
|
||||
break;
|
||||
|
||||
// Year, divided by 100, zero-padded
|
||||
@ -1645,14 +1676,24 @@ namespace
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
|
||||
break;
|
||||
|
||||
// Hour in 24h format (00-23)
|
||||
// Hour in 24h format:
|
||||
// - if parsedatetime_parse_without_leading_zeros = true, possibly without leading zero: i.e. 0-23
|
||||
// - else with leading zero required: i.e. 00-23
|
||||
case 'k':
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
|
||||
if (mysql_parse_ckl_without_leading_zeros)
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24WithoutLeadingZero));
|
||||
else
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24));
|
||||
break;
|
||||
|
||||
// Hour in 12h format (01-12)
|
||||
// Hour in 12h format:
|
||||
// - if parsedatetime_parse_without_leading_zeros = true: possibly without leading zero, i.e. 0-12
|
||||
// - else with leading zero required: i.e. 00-12
|
||||
case 'l':
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
|
||||
if (mysql_parse_ckl_without_leading_zeros)
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12WithoutLeadingZero));
|
||||
else
|
||||
instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12));
|
||||
break;
|
||||
|
||||
case 't':
|
||||
|
@ -649,7 +649,7 @@ Aws::String SSOCredentialsProvider::loadAccessTokenFile(const Aws::String & sso_
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TRACE(logger, "Unable to open token file on path: {}", sso_access_token_path);
|
||||
LOG_TEST(logger, "Unable to open token file on path: {}", sso_access_token_path);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
@ -1446,18 +1446,15 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey(
|
||||
for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst)
|
||||
{
|
||||
if (inst->offsets)
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(
|
||||
inst->batch_that->addBatchSinglePlace(
|
||||
inst->offsets[static_cast<ssize_t>(row_begin) - 1],
|
||||
inst->offsets[row_end - 1],
|
||||
res + inst->state_offset,
|
||||
inst->batch_arguments, data_variants.aggregates_pool);
|
||||
else
|
||||
inst->batch_that->addBatchSinglePlaceFromInterval(
|
||||
row_begin,
|
||||
row_end,
|
||||
res + inst->state_offset,
|
||||
inst->batch_arguments,
|
||||
data_variants.aggregates_pool);
|
||||
else
|
||||
inst->batch_that->addBatchSinglePlace(
|
||||
row_begin, row_end, res + inst->state_offset, inst->batch_arguments, data_variants.aggregates_pool);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -206,10 +206,19 @@ Block ArrayJoinResultIterator::next()
|
||||
bool is_left = array_join->is_left;
|
||||
auto cut_any_col = any_array->cut(current_row, next_row - current_row);
|
||||
const auto * cut_any_array = typeid_cast<const ColumnArray *>(cut_any_col.get());
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
ColumnWithTypeAndName current = block.safeGetByPosition(i);
|
||||
current.column = current.column->cut(current_row, next_row - current_row);
|
||||
|
||||
/// Reuse cut_any_col if possible to avoid unnecessary cut.
|
||||
if (!is_unaligned && !is_left && current.name == *columns.begin())
|
||||
{
|
||||
current.column = cut_any_col;
|
||||
current.type = getArrayJoinDataType(current.type);
|
||||
}
|
||||
else
|
||||
current.column = current.column->cut(current_row, next_row - current_row);
|
||||
|
||||
if (columns.contains(current.name))
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Core/UUID.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
#include <base/types.h>
|
||||
#include <Common/OpenTelemetryTraceContext.h>
|
||||
#include <Common/VersionNumber.h>
|
||||
@ -96,6 +97,7 @@ public:
|
||||
|
||||
/// For mysql and postgresql
|
||||
UInt64 connection_id = 0;
|
||||
Poco::Net::NameValueCollection headers;
|
||||
|
||||
/// Comma separated list of forwarded IP addresses (from X-Forwarded-For for HTTP interface).
|
||||
/// It's expected that proxy appends the forwarded address to the end of the list.
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <optional>
|
||||
#include <memory>
|
||||
#include <Poco/UUID.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/Macros.h>
|
||||
@ -322,9 +323,12 @@ struct ContextSharedPart : boost::noncopyable
|
||||
std::optional<MergeTreeSettings> merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of MergeTree* engines.
|
||||
std::optional<MergeTreeSettings> replicated_merge_tree_settings TSA_GUARDED_BY(mutex); /// Settings of ReplicatedMergeTree* engines.
|
||||
std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
|
||||
std::unordered_set<String> get_client_http_header_forbidden_headers;
|
||||
bool allow_get_client_http_header;
|
||||
std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
|
||||
/// No lock required for format_schema_path modified only during initialization
|
||||
String format_schema_path; /// Path to a directory that contains schema files used by input formats.
|
||||
String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
|
||||
mutable OnceFlag action_locks_manager_initialized;
|
||||
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
|
||||
OnceFlag system_logs_initialized;
|
||||
@ -3950,6 +3954,28 @@ void Context::checkTableCanBeDropped(const String & database, const String & tab
|
||||
}
|
||||
|
||||
|
||||
void Context::setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers)
|
||||
{
|
||||
std::unordered_set<String> forbidden_header_list;
|
||||
boost::split(forbidden_header_list, forbidden_headers, [](char c) { return c == ','; });
|
||||
shared->get_client_http_header_forbidden_headers = forbidden_header_list;
|
||||
}
|
||||
|
||||
void Context::setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function)
|
||||
{
|
||||
shared->allow_get_client_http_header= allow_get_http_header_function;
|
||||
}
|
||||
|
||||
const std::unordered_set<String> & Context::getClientHTTPHeaderForbiddenHeaders() const
|
||||
{
|
||||
return shared->get_client_http_header_forbidden_headers;
|
||||
}
|
||||
|
||||
bool Context::allowGetHTTPHeaderFunction() const
|
||||
{
|
||||
return shared->allow_get_client_http_header;
|
||||
}
|
||||
|
||||
void Context::setMaxPartitionSizeToDrop(size_t max_size)
|
||||
{
|
||||
// Is initialized at server startup and updated at config reload
|
||||
@ -4116,6 +4142,16 @@ void Context::setFormatSchemaPath(const String & path)
|
||||
shared->format_schema_path = path;
|
||||
}
|
||||
|
||||
String Context::getGoogleProtosPath() const
|
||||
{
|
||||
return shared->google_protos_path;
|
||||
}
|
||||
|
||||
void Context::setGoogleProtosPath(const String & path)
|
||||
{
|
||||
shared->google_protos_path = path;
|
||||
}
|
||||
|
||||
Context::SampleBlockCache & Context::getSampleBlockCache() const
|
||||
{
|
||||
assert(hasQueryContext());
|
||||
@ -4270,12 +4306,15 @@ void Context::setClientConnectionId(uint32_t connection_id_)
|
||||
client_info.connection_id = connection_id_;
|
||||
}
|
||||
|
||||
void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
|
||||
void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
|
||||
{
|
||||
client_info.http_method = http_method;
|
||||
client_info.http_user_agent = http_user_agent;
|
||||
client_info.http_referer = http_referer;
|
||||
need_recalculate_access = true;
|
||||
|
||||
if (!http_headers.empty())
|
||||
client_info.headers = http_headers;
|
||||
}
|
||||
|
||||
void Context::setForwardedFor(const String & forwarded_for)
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include <Server/HTTP/HTTPContext.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
@ -640,7 +642,7 @@ public:
|
||||
void setClientInterface(ClientInfo::Interface interface);
|
||||
void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
|
||||
void setClientConnectionId(uint32_t connection_id);
|
||||
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
|
||||
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
|
||||
void setForwardedFor(const String & forwarded_for);
|
||||
void setQueryKind(ClientInfo::QueryKind query_kind);
|
||||
void setQueryKindInitial();
|
||||
@ -1073,6 +1075,11 @@ public:
|
||||
/// Prevents DROP TABLE if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
|
||||
void setMaxTableSizeToDrop(size_t max_size);
|
||||
size_t getMaxTableSizeToDrop() const;
|
||||
void setClientHTTPHeaderForbiddenHeaders(const String & forbidden_headers);
|
||||
/// Return the forbiddent headers that users can't get via getClientHTTPHeader function
|
||||
const std::unordered_set<String> & getClientHTTPHeaderForbiddenHeaders() const;
|
||||
void setAllowGetHTTPHeaderFunction(bool allow_get_http_header_function);
|
||||
bool allowGetHTTPHeaderFunction() const;
|
||||
void checkTableCanBeDropped(const String & database, const String & table, const size_t & table_size) const;
|
||||
|
||||
/// Prevents DROP PARTITION if its size is greater than max_size (50GB by default, max_size=0 turn off this check)
|
||||
@ -1140,6 +1147,10 @@ public:
|
||||
String getFormatSchemaPath() const;
|
||||
void setFormatSchemaPath(const String & path);
|
||||
|
||||
/// Path to the folder containing the proto files for the well-known Protobuf types
|
||||
String getGoogleProtosPath() const;
|
||||
void setGoogleProtosPath(const String & path);
|
||||
|
||||
SampleBlockCache & getSampleBlockCache() const;
|
||||
|
||||
/// Query parameters for prepared statements.
|
||||
|
@ -690,9 +690,15 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
if (column.default_desc.kind == ColumnDefaultKind::Materialized)
|
||||
{
|
||||
auto type_literal = std::make_shared<ASTLiteral>(column.type->getName());
|
||||
|
||||
auto materialized_column = makeASTFunction("_CAST",
|
||||
column.default_desc.expression->clone(),
|
||||
type_literal);
|
||||
|
||||
stages.back().column_to_updated.emplace(
|
||||
column.name,
|
||||
column.default_desc.expression->clone());
|
||||
materialized_column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,124 +0,0 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
#include <Interpreters/RewriteAnyFunctionVisitor.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> & identifiers)
|
||||
{
|
||||
for (auto & arg : func.arguments->children)
|
||||
{
|
||||
if (const auto * arg_func = arg->as<ASTFunction>())
|
||||
{
|
||||
/// arrayJoin() is special and should not be optimized (think about
|
||||
/// it as a an aggregate function), otherwise wrong result will be
|
||||
/// produced:
|
||||
/// SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
|
||||
/// ┌─number─┬─arrayJoin(array(array(), array()))─┐
|
||||
/// │ 0 │ [] │
|
||||
/// │ 0 │ [] │
|
||||
/// └────────┴────────────────────────────────────┘
|
||||
/// While should be:
|
||||
/// ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
|
||||
/// │ 0 │ [] │
|
||||
/// └────────┴─────────────────────────────────────────┘
|
||||
if (arg_func->name == "arrayJoin")
|
||||
return false;
|
||||
|
||||
if (arg_func->name == "lambda")
|
||||
return false;
|
||||
|
||||
// We are looking for identifiers inside a function calculated inside
|
||||
// the aggregate function `any()`. Window or aggregate function can't
|
||||
// be inside `any`, but this check in GetAggregatesMatcher happens
|
||||
// later, so we have to explicitly skip these nested functions here.
|
||||
if (arg_func->is_window_function
|
||||
|| AggregateUtils::isAggregateFunction(*arg_func))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!extractIdentifiers(*arg_func, identifiers))
|
||||
return false;
|
||||
}
|
||||
else if (arg->as<ASTIdentifier>())
|
||||
identifiers.emplace(&arg);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * func = ast->as<ASTFunction>())
|
||||
{
|
||||
if (func->is_window_function)
|
||||
return;
|
||||
|
||||
visit(*func, ast, data);
|
||||
}
|
||||
}
|
||||
|
||||
void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (!func.arguments || func.arguments->children.empty() || !func.arguments->children[0])
|
||||
return;
|
||||
|
||||
if (func.name != "any" && func.name != "anyLast")
|
||||
return;
|
||||
|
||||
auto & func_arguments = func.arguments->children;
|
||||
|
||||
if (func_arguments.size() != 1)
|
||||
return;
|
||||
|
||||
const auto * first_arg_func = func_arguments[0]->as<ASTFunction>();
|
||||
if (!first_arg_func || first_arg_func->arguments->children.empty())
|
||||
return;
|
||||
|
||||
/// We have rewritten this function. Just unwrap its argument.
|
||||
if (data.rewritten.contains(ast.get()))
|
||||
{
|
||||
func_arguments[0]->setAlias(func.alias);
|
||||
ast = func_arguments[0];
|
||||
return;
|
||||
}
|
||||
|
||||
std::unordered_set<ASTPtr *> identifiers; /// implicit remove duplicates
|
||||
if (!extractIdentifiers(func, identifiers))
|
||||
return;
|
||||
|
||||
/// Wrap identifiers: any(f(x, y, g(z))) -> any(f(any(x), any(y), g(any(z))))
|
||||
for (auto * ast_to_change : identifiers)
|
||||
{
|
||||
ASTPtr identifier_ast = *ast_to_change;
|
||||
*ast_to_change = makeASTFunction(func.name);
|
||||
(*ast_to_change)->as<ASTFunction>()->arguments->children.emplace_back(identifier_ast);
|
||||
}
|
||||
|
||||
data.rewritten.insert(ast.get());
|
||||
|
||||
/// Unwrap function: any(f(any(x), any(y), g(any(z)))) -> f(any(x), any(y), g(any(z)))
|
||||
func_arguments[0]->setAlias(func.alias);
|
||||
ast = func_arguments[0];
|
||||
}
|
||||
|
||||
bool RewriteAnyFunctionMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &)
|
||||
{
|
||||
return !node->as<ASTSubquery>() &&
|
||||
!node->as<ASTTableExpression>() &&
|
||||
!node->as<ASTArrayJoin>();
|
||||
}
|
||||
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTFunction;
|
||||
|
||||
/// Rewrite 'any' and 'anyLast' functions pushing them inside original function.
|
||||
/// any(f(x, y, g(z))) -> f(any(x), any(y), g(any(z)))
|
||||
class RewriteAnyFunctionMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
std::unordered_set<IAST *> rewritten;
|
||||
};
|
||||
|
||||
static void visit(ASTPtr & ast, Data & data);
|
||||
static void visit(const ASTFunction &, ASTPtr & ast, Data & data);
|
||||
static bool needChildVisit(const ASTPtr & node, const ASTPtr & child);
|
||||
};
|
||||
using RewriteAnyFunctionVisitor = InDepthNodeVisitor<RewriteAnyFunctionMatcher, false>;
|
||||
|
||||
}
|
@ -15,6 +15,7 @@
|
||||
#include <Interpreters/Cluster.h>
|
||||
|
||||
#include <magic_enum.hpp>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
@ -431,7 +432,7 @@ void Session::setClientConnectionId(uint32_t connection_id)
|
||||
prepared_client_info->connection_id = connection_id;
|
||||
}
|
||||
|
||||
void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer)
|
||||
void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers)
|
||||
{
|
||||
if (session_context)
|
||||
{
|
||||
@ -442,6 +443,7 @@ void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String
|
||||
prepared_client_info->http_method = http_method;
|
||||
prepared_client_info->http_user_agent = http_user_agent;
|
||||
prepared_client_info->http_referer = http_referer;
|
||||
prepared_client_info->headers = http_headers;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Interpreters/ClientInfo.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Interpreters/SessionTracker.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
@ -64,7 +65,7 @@ public:
|
||||
void setClientInterface(ClientInfo::Interface interface);
|
||||
void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
|
||||
void setClientConnectionId(uint32_t connection_id);
|
||||
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer);
|
||||
void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer, const Poco::Net::NameValueCollection & http_headers = {});
|
||||
void setForwardedFor(const String & forwarded_for);
|
||||
void setQuotaClientKey(const String & quota_key);
|
||||
void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version);
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <Interpreters/DuplicateOrderByVisitor.h>
|
||||
#include <Interpreters/GroupByFunctionKeysVisitor.h>
|
||||
#include <Interpreters/AggregateFunctionOfGroupByKeysVisitor.h>
|
||||
#include <Interpreters/RewriteAnyFunctionVisitor.h>
|
||||
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
|
||||
#include <Interpreters/FunctionMaskingArgumentCheckVisitor.h>
|
||||
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
|
||||
@ -606,12 +605,6 @@ void optimizeAggregationFunctions(ASTPtr & query)
|
||||
ArithmeticOperationsInAgrFuncVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeAnyFunctions(ASTPtr & query)
|
||||
{
|
||||
RewriteAnyFunctionVisitor::Data data = {};
|
||||
RewriteAnyFunctionVisitor(data).visit(query);
|
||||
}
|
||||
|
||||
void optimizeSumIfFunctions(ASTPtr & query)
|
||||
{
|
||||
RewriteSumIfFunctionVisitor::Data data = {};
|
||||
@ -764,10 +757,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
|
||||
if (settings.optimize_group_by_function_keys)
|
||||
optimizeGroupByFunctionKeys(select_query);
|
||||
|
||||
/// Move all operations out of any function
|
||||
if (settings.optimize_move_functions_out_of_any)
|
||||
optimizeAnyFunctions(query);
|
||||
|
||||
if (settings.optimize_normalize_count_variants)
|
||||
optimizeCountConstantAndSumOne(query, context);
|
||||
|
||||
|
@ -15,18 +15,20 @@ ProtobufListInputFormat::ProtobufListInputFormat(
|
||||
const Block & header_,
|
||||
const Params & params_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool flatten_google_wrappers_)
|
||||
bool flatten_google_wrappers_,
|
||||
const String & google_protos_path)
|
||||
: IRowInputFormat(header_, in_, params_)
|
||||
, reader(std::make_unique<ProtobufReader>(in_))
|
||||
, serializer(ProtobufSerializer::create(
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
missing_column_indices,
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes),
|
||||
/* with_length_delimiter = */ true,
|
||||
/* with_envelope = */ true,
|
||||
flatten_google_wrappers_,
|
||||
*reader))
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
missing_column_indices,
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
|
||||
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
|
||||
/* with_length_delimiter = */ true,
|
||||
/* with_envelope = */ true,
|
||||
flatten_google_wrappers_,
|
||||
*reader))
|
||||
{
|
||||
}
|
||||
|
||||
@ -83,33 +85,33 @@ size_t ProtobufListInputFormat::countRows(size_t max_block_size)
|
||||
|
||||
ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format_settings)
|
||||
: schema_info(
|
||||
format_settings.schema.format_schema,
|
||||
"Protobuf",
|
||||
true,
|
||||
format_settings.schema.is_server,
|
||||
format_settings.schema.format_schema_path)
|
||||
format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path)
|
||||
, skip_unsopported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
|
||||
, google_protos_path(format_settings.protobuf.google_protos_path)
|
||||
{
|
||||
}
|
||||
|
||||
NamesAndTypesList ProtobufListSchemaReader::readSchema()
|
||||
{
|
||||
const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes);
|
||||
const auto * message_descriptor
|
||||
= ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
|
||||
return protobufSchemaToCHSchema(message_descriptor, skip_unsopported_fields);
|
||||
}
|
||||
|
||||
void registerInputFormatProtobufList(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormat(
|
||||
"ProtobufList",
|
||||
[](ReadBuffer &buf,
|
||||
const Block & sample,
|
||||
RowInputFormatParams params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufListInputFormat>(buf, sample, std::move(params),
|
||||
ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), settings.protobuf.input_flatten_google_wrappers);
|
||||
});
|
||||
"ProtobufList",
|
||||
[](ReadBuffer & buf, const Block & sample, RowInputFormatParams params, const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufListInputFormat>(
|
||||
buf,
|
||||
sample,
|
||||
std::move(params),
|
||||
ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
|
||||
settings.protobuf.input_flatten_google_wrappers,
|
||||
settings.protobuf.google_protos_path);
|
||||
});
|
||||
factory.markFormatSupportsSubsetOfColumns("ProtobufList");
|
||||
factory.registerAdditionalInfoForSchemaCacheGetter(
|
||||
"ProtobufList",
|
||||
|
@ -29,7 +29,8 @@ public:
|
||||
const Block & header_,
|
||||
const Params & params_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool flatten_google_wrappers_);
|
||||
bool flatten_google_wrappers_,
|
||||
const String & google_protos_path);
|
||||
|
||||
String getName() const override { return "ProtobufListInputFormat"; }
|
||||
|
||||
@ -56,6 +57,7 @@ public:
|
||||
private:
|
||||
const FormatSchemaInfo schema_info;
|
||||
bool skip_unsopported_fields;
|
||||
const String google_protos_path;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -13,13 +13,15 @@ ProtobufListOutputFormat::ProtobufListOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool defaults_for_nullable_google_wrappers_)
|
||||
bool defaults_for_nullable_google_wrappers_,
|
||||
const String & google_protos_path)
|
||||
: IRowOutputFormat(header_, out_)
|
||||
, writer(std::make_unique<ProtobufWriter>(out))
|
||||
, serializer(ProtobufSerializer::create(
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes),
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
|
||||
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
|
||||
/* with_length_delimiter = */ true,
|
||||
/* with_envelope = */ true,
|
||||
defaults_for_nullable_google_wrappers_,
|
||||
@ -49,13 +51,14 @@ void registerOutputFormatProtobufList(FormatFactory & factory)
|
||||
{
|
||||
factory.registerOutputFormat(
|
||||
"ProtobufList",
|
||||
[](WriteBuffer & buf,
|
||||
const Block & header,
|
||||
const FormatSettings & settings)
|
||||
[](WriteBuffer & buf, const Block & header, const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufListOutputFormat>(
|
||||
buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema),
|
||||
settings.protobuf.output_nullables_with_google_wrappers);
|
||||
buf,
|
||||
header,
|
||||
ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema),
|
||||
settings.protobuf.output_nullables_with_google_wrappers,
|
||||
settings.protobuf.google_protos_path);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,8 @@ public:
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool defaults_for_nullable_google_wrappers_);
|
||||
bool defaults_for_nullable_google_wrappers_,
|
||||
const String & google_protos_path);
|
||||
|
||||
String getName() const override { return "ProtobufListOutputFormat"; }
|
||||
|
||||
|
@ -10,10 +10,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_,
|
||||
const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_)
|
||||
ProtobufRowInputFormat::ProtobufRowInputFormat(
|
||||
ReadBuffer & in_,
|
||||
const Block & header_,
|
||||
const Params & params_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool with_length_delimiter_,
|
||||
bool flatten_google_wrappers_,
|
||||
const String & google_protos_path)
|
||||
: IRowInputFormat(header_, in_, params_)
|
||||
, message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No))
|
||||
, message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
|
||||
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, google_protos_path))
|
||||
, with_length_delimiter(with_length_delimiter_)
|
||||
, flatten_google_wrappers(flatten_google_wrappers_)
|
||||
{
|
||||
@ -98,34 +105,35 @@ void registerInputFormatProtobuf(FormatFactory & factory)
|
||||
{
|
||||
for (bool with_length_delimiter : {false, true})
|
||||
{
|
||||
factory.registerInputFormat(with_length_delimiter ? "Protobuf" : "ProtobufSingle", [with_length_delimiter](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
IRowInputFormat::Params params,
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
|
||||
ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
|
||||
with_length_delimiter,
|
||||
settings.protobuf.input_flatten_google_wrappers);
|
||||
});
|
||||
factory.registerInputFormat(
|
||||
with_length_delimiter ? "Protobuf" : "ProtobufSingle",
|
||||
[with_length_delimiter](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufRowInputFormat>(
|
||||
buf,
|
||||
sample,
|
||||
std::move(params),
|
||||
ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema),
|
||||
with_length_delimiter,
|
||||
settings.protobuf.input_flatten_google_wrappers,
|
||||
settings.protobuf.google_protos_path);
|
||||
});
|
||||
factory.markFormatSupportsSubsetOfColumns(with_length_delimiter ? "Protobuf" : "ProtobufSingle");
|
||||
}
|
||||
}
|
||||
|
||||
ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_settings)
|
||||
: schema_info(
|
||||
format_settings.schema.format_schema,
|
||||
"Protobuf",
|
||||
true,
|
||||
format_settings.schema.is_server, format_settings.schema.format_schema_path)
|
||||
format_settings.schema.format_schema, "Protobuf", true, format_settings.schema.is_server, format_settings.schema.format_schema_path)
|
||||
, skip_unsupported_fields(format_settings.protobuf.skip_fields_with_unsupported_types_in_schema_inference)
|
||||
, google_protos_path(format_settings.protobuf.google_protos_path)
|
||||
{
|
||||
}
|
||||
|
||||
NamesAndTypesList ProtobufSchemaReader::readSchema()
|
||||
{
|
||||
const auto * message_descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No);
|
||||
const auto * message_descriptor
|
||||
= ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
|
||||
return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,8 @@ public:
|
||||
const Params & params_,
|
||||
const ProtobufSchemaInfo & schema_info_,
|
||||
bool with_length_delimiter_,
|
||||
bool flatten_google_wrappers_);
|
||||
bool flatten_google_wrappers_,
|
||||
const String & google_protos_path);
|
||||
|
||||
String getName() const override { return "ProtobufRowInputFormat"; }
|
||||
|
||||
@ -71,6 +72,7 @@ public:
|
||||
private:
|
||||
const FormatSchemaInfo schema_info;
|
||||
bool skip_unsupported_fields;
|
||||
String google_protos_path;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -27,7 +27,8 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
|
||||
, serializer(ProtobufSerializer::create(
|
||||
header_.getNames(),
|
||||
header_.getDataTypes(),
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No),
|
||||
*ProtobufSchemas::instance().getMessageTypeForFormatSchema(
|
||||
schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, settings_.protobuf.google_protos_path),
|
||||
with_length_delimiter_,
|
||||
/* with_envelope = */ false,
|
||||
settings_.protobuf.output_nullables_with_google_wrappers,
|
||||
|
@ -958,7 +958,7 @@ void WindowTransform::updateAggregationState()
|
||||
auto * columns = ws.argument_columns.data();
|
||||
// Removing arena.get() from the loop makes it faster somehow...
|
||||
auto * arena_ptr = arena.get();
|
||||
a->addBatchSinglePlaceFromInterval(first_row, past_the_end_row, buf, columns, arena_ptr);
|
||||
a->addBatchSinglePlace(first_row, past_the_end_row, buf, columns, arena_ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <Poco/StreamCopier.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Poco/Net/NameValueCollection.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <sstream>
|
||||
@ -502,7 +503,7 @@ bool HTTPHandler::authenticateUser(
|
||||
else if (request.getMethod() == HTTPServerRequest::HTTP_POST)
|
||||
http_method = ClientInfo::HTTPMethod::POST;
|
||||
|
||||
session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""));
|
||||
session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", ""), request);
|
||||
session->setForwardedFor(request.get("X-Forwarded-For", ""));
|
||||
session->setQuotaClientKey(quota_key);
|
||||
|
||||
|
@ -60,7 +60,6 @@ bool ColumnDescription::identical(const ColumnDescription & other) const
|
||||
return name == other.name
|
||||
&& type->identical(*other.type)
|
||||
&& default_desc == other.default_desc
|
||||
&& comment == other.comment
|
||||
&& ast_to_str(codec) == ast_to_str(other.codec)
|
||||
&& ast_to_str(ttl) == ast_to_str(other.ttl);
|
||||
}
|
||||
@ -72,7 +71,6 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
|
||||
return name == other.name
|
||||
&& type->equals(*other.type)
|
||||
&& default_desc == other.default_desc
|
||||
&& comment == other.comment
|
||||
&& ast_to_str(codec) == ast_to_str(other.codec)
|
||||
&& ast_to_str(ttl) == ast_to_str(other.ttl);
|
||||
}
|
||||
|
695
src/Storages/StorageFuzzJSON.cpp
Normal file
695
src/Storages/StorageFuzzJSON.cpp
Normal file
@ -0,0 +1,695 @@
|
||||
#include <optional>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/StorageFuzzJSON.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Common/JSONParsers/RapidJSONParser.h>
|
||||
#include <Common/JSONParsers/SimdJSONParser.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using uniform = std::uniform_int_distribution<size_t>;
|
||||
|
||||
struct JSONNode;
|
||||
using JSONNodeList = std::list<std::shared_ptr<JSONNode>>;
|
||||
|
||||
struct JSONValue
|
||||
{
|
||||
enum class Type : size_t
|
||||
{
|
||||
Fixed = 0,
|
||||
Array = 1,
|
||||
Object = 2,
|
||||
};
|
||||
|
||||
static Type getType(const JSONValue & v);
|
||||
|
||||
// The node value must be one of the following:
|
||||
// Examples: 5, true, "abc"
|
||||
std::optional<Field> fixed;
|
||||
// Examples: [], ["a"], [1, true]
|
||||
std::optional<JSONNodeList> array;
|
||||
// Examples: {}, {"a": [1,2], "b": "c"}
|
||||
std::optional<JSONNodeList> object;
|
||||
};
|
||||
|
||||
JSONValue::Type JSONValue::getType(const JSONValue & v)
|
||||
{
|
||||
if (v.fixed)
|
||||
{
|
||||
assert(!v.array);
|
||||
assert(!v.object);
|
||||
return JSONValue::Type::Fixed;
|
||||
}
|
||||
else if (v.array)
|
||||
{
|
||||
assert(!v.fixed);
|
||||
assert(!v.object);
|
||||
return JSONValue::Type::Array;
|
||||
}
|
||||
else if (v.object)
|
||||
{
|
||||
assert(!v.fixed);
|
||||
assert(!v.array);
|
||||
return JSONValue::Type::Object;
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to determine JSON node type.");
|
||||
}
|
||||
|
||||
// A node represents either a JSON field (a key-value pair) or a JSON value.
|
||||
// The key is not set for the JSON root and for the array items.
|
||||
struct JSONNode
|
||||
{
|
||||
std::optional<String> key;
|
||||
JSONValue value;
|
||||
};
|
||||
|
||||
#if USE_SIMDJSON
|
||||
using ParserImpl = DB::SimdJSONParser;
|
||||
#elif USE_RAPIDJSON
|
||||
using ParserImpl = DB::RapidJSONParser;
|
||||
#endif
|
||||
|
||||
std::optional<Field> getFixedValue(const ParserImpl::Element & e)
|
||||
{
|
||||
return e.isBool() ? e.getBool()
|
||||
: e.isInt64() ? e.getInt64()
|
||||
: e.isUInt64() ? e.getUInt64()
|
||||
: e.isDouble() ? e.getDouble()
|
||||
: e.isString() ? e.getString()
|
||||
: e.isNull() ? Field()
|
||||
: std::optional<Field>();
|
||||
}
|
||||
|
||||
void traverse(const ParserImpl::Element & e, std::shared_ptr<JSONNode> node)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
assert(node);
|
||||
|
||||
auto & val = node->value;
|
||||
if (e.isObject())
|
||||
{
|
||||
const auto & obj = e.getObject();
|
||||
if (!val.object)
|
||||
val.object = JSONNodeList{};
|
||||
|
||||
for (const auto [k, v] : obj)
|
||||
{
|
||||
auto child = std::make_shared<JSONNode>();
|
||||
child->key = k;
|
||||
traverse(v, child);
|
||||
val.object->push_back(child);
|
||||
}
|
||||
}
|
||||
else if (e.isArray())
|
||||
{
|
||||
if (!val.array)
|
||||
val.array = JSONNodeList{};
|
||||
|
||||
const auto arr = e.getArray();
|
||||
for (const auto a : arr)
|
||||
{
|
||||
auto child = std::make_shared<JSONNode>();
|
||||
traverse(a, child);
|
||||
val.array->push_back(child);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto field = getFixedValue(e);
|
||||
if (!field)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to parse a fixed JSON value.");
|
||||
|
||||
val.fixed = std::move(field);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<JSONNode> parseJSON(const String & json)
|
||||
{
|
||||
std::string_view view{json.begin(), json.end()};
|
||||
ParserImpl::Element document;
|
||||
ParserImpl p;
|
||||
|
||||
if (!p.parse(json, document))
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Failed to parse JSON string.");
|
||||
|
||||
auto root = std::make_shared<JSONNode>();
|
||||
traverse(document, root);
|
||||
return root;
|
||||
}
|
||||
|
||||
char generateRandomCharacter(pcg64 & rnd, const std::string_view & charset)
|
||||
{
|
||||
assert(!charset.empty());
|
||||
auto idx = uniform(0, charset.size() - 1)(rnd);
|
||||
return charset[idx];
|
||||
}
|
||||
|
||||
char generateRandomKeyCharacter(pcg64 & rnd)
|
||||
{
|
||||
static constexpr std::string_view charset = "0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz";
|
||||
return generateRandomCharacter(rnd, charset);
|
||||
}
|
||||
|
||||
char generateRandomStringValueCharacter(pcg64 & rnd)
|
||||
{
|
||||
static constexpr std::string_view charset = "0123456789"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"!@#$%^&*-+_";
|
||||
return generateRandomCharacter(rnd, charset);
|
||||
}
|
||||
|
||||
String generateRandomStringValue(UInt64 min_length, UInt64 max_length, pcg64 & rnd)
|
||||
{
|
||||
size_t size = min_length + rnd() % (max_length - min_length + 1);
|
||||
String res;
|
||||
res.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res.push_back(generateRandomStringValueCharacter(rnd));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
String generateRandomKey(UInt64 min_length, UInt64 max_length, pcg64 & rnd)
|
||||
{
|
||||
size_t size = min_length + rnd() % (max_length - min_length + 1);
|
||||
String res;
|
||||
res.reserve(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res.push_back(generateRandomKeyCharacter(rnd));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
enum class FuzzAction : size_t
|
||||
{
|
||||
Skip = 0,
|
||||
Edit = 1,
|
||||
Add = 2,
|
||||
Delete = 3,
|
||||
};
|
||||
|
||||
Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pcg64 & rnd)
|
||||
{
|
||||
// TODO (@jkartseva): support more field types.
|
||||
static std::array<Field::Types::Which, 3> possible_types{
|
||||
Field::Types::Which::UInt64, Field::Types::Which::String, Field::Types::Which::Bool};
|
||||
|
||||
Field f;
|
||||
auto idx = rnd() % possible_types.size();
|
||||
switch (possible_types[idx])
|
||||
{
|
||||
case Field::Types::Which::UInt64: {
|
||||
f = rnd();
|
||||
break;
|
||||
}
|
||||
case Field::Types::Which::String:
|
||||
f = generateRandomStringValue(/*min_length*/ 0, config.max_string_value_length, rnd);
|
||||
break;
|
||||
case Field::Types::Which::Bool:
|
||||
f = bool(rnd() % 2);
|
||||
break;
|
||||
default:
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source)
|
||||
{
|
||||
String result;
|
||||
result.reserve(config.max_key_length);
|
||||
|
||||
using FA = FuzzAction;
|
||||
auto get_action = [&]() -> FuzzAction
|
||||
{
|
||||
static constexpr std::array<FuzzAction, 4> actions{FA::Skip, FA::Edit, FA::Add, FA::Delete};
|
||||
return actions[uniform(0, 3)(rnd)];
|
||||
};
|
||||
|
||||
size_t i = 0;
|
||||
while (i < source.size() && result.size() < config.max_key_length)
|
||||
{
|
||||
auto action = get_action();
|
||||
switch (action)
|
||||
{
|
||||
case FA::Skip: {
|
||||
result.push_back(source[i++]);
|
||||
}
|
||||
break;
|
||||
case FA::Edit: {
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
++i;
|
||||
}
|
||||
break;
|
||||
case FA::Add: {
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
while (result.size() < config.min_key_length)
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<JSONNode>
|
||||
generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type)
|
||||
{
|
||||
auto node = std::make_shared<JSONNode>();
|
||||
|
||||
if (with_key)
|
||||
node->key = generateRandomKey(config.min_key_length, config.max_key_length, rnd);
|
||||
|
||||
auto & val = node->value;
|
||||
switch (type)
|
||||
|
||||
{
|
||||
case JSONValue::Type::Fixed: {
|
||||
val.fixed = generateRandomFixedValue(config, rnd);
|
||||
break;
|
||||
}
|
||||
case JSONValue::Type::Array: {
|
||||
val.array = JSONNodeList{};
|
||||
break;
|
||||
}
|
||||
case JSONValue::Type::Object: {
|
||||
val.object = JSONNodeList{};
|
||||
break;
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
template <size_t n>
|
||||
std::shared_ptr<JSONNode> generateRandomJSONNode(
|
||||
const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, const std::array<JSONValue::Type, n> & possible_types)
|
||||
{
|
||||
auto type = possible_types[uniform(0, possible_types.size() - 1)(rnd)];
|
||||
return generateRandomJSONNode(config, rnd, with_key, type);
|
||||
}
|
||||
|
||||
std::shared_ptr<JSONNode> generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, size_t depth)
|
||||
{
|
||||
if (depth >= config.max_nesting_level)
|
||||
return generateRandomJSONNode(config, rnd, with_key, JSONValue::Type::Fixed);
|
||||
|
||||
static constexpr std::array<JSONValue::Type, 3> possible_types
|
||||
= {JSONValue::Type::Fixed, JSONValue::Type::Array, JSONValue::Type::Object};
|
||||
return generateRandomJSONNode(config, rnd, with_key, possible_types);
|
||||
}
|
||||
|
||||
JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration & config, pcg64 & rnd, size_t depth, size_t & node_count)
|
||||
{
|
||||
auto & val = n.value;
|
||||
|
||||
static constexpr size_t update_key = 1;
|
||||
static constexpr size_t update_value = 2;
|
||||
|
||||
auto action = 1 + rnd() % static_cast<size_t>(update_key | update_value);
|
||||
if (n.key && (action & update_key))
|
||||
n.key = fuzzJSONKey(config, rnd, *n.key);
|
||||
|
||||
if ((action & update_value) == 0)
|
||||
return n;
|
||||
|
||||
if (val.fixed)
|
||||
val.fixed = generateRandomFixedValue(config, rnd);
|
||||
else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < config.value_number_limit)
|
||||
{
|
||||
if (val.array->empty())
|
||||
val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, depth));
|
||||
else
|
||||
{
|
||||
// Use the type of the preceding element.
|
||||
const auto & prev = val.array->back();
|
||||
auto value_type = JSONValue::getType(prev->value);
|
||||
val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, value_type));
|
||||
}
|
||||
++node_count;
|
||||
}
|
||||
else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < config.value_number_limit)
|
||||
{
|
||||
val.object->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ true, depth));
|
||||
++node_count;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
void fuzzJSONObject(
|
||||
const std::shared_ptr<JSONNode> & node,
|
||||
WriteBuffer & out,
|
||||
const StorageFuzzJSON::Configuration & config,
|
||||
pcg64 & rnd,
|
||||
size_t depth,
|
||||
size_t & node_count)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
++node_count;
|
||||
|
||||
bool should_fuzz = rnd() % 100 < 100 * config.probability;
|
||||
|
||||
const auto & next_node = should_fuzz && !config.should_reuse_output ? std::make_shared<JSONNode>(*node) : node;
|
||||
|
||||
if (should_fuzz)
|
||||
fuzzSingleJSONNode(*next_node, config, rnd, depth, node_count);
|
||||
|
||||
if (next_node->key)
|
||||
{
|
||||
writeDoubleQuoted(*next_node->key, out);
|
||||
out << ":";
|
||||
}
|
||||
|
||||
auto & val = next_node->value;
|
||||
|
||||
if (val.fixed)
|
||||
{
|
||||
if (val.fixed->getType() == Field::Types::Which::String)
|
||||
writeDoubleQuoted(val.fixed->get<String>(), out);
|
||||
else
|
||||
writeFieldText(*val.fixed, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!val.array && !val.object)
|
||||
return;
|
||||
|
||||
const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object);
|
||||
|
||||
out << op;
|
||||
|
||||
bool first = true;
|
||||
for (const auto & ptr : node_list)
|
||||
{
|
||||
if (node_count >= config.value_number_limit)
|
||||
break;
|
||||
|
||||
WriteBufferFromOwnString child_out;
|
||||
if (!first)
|
||||
child_out << ", ";
|
||||
first = false;
|
||||
|
||||
fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count);
|
||||
// Should not exceed the maximum length of the output string.
|
||||
if (out.count() + child_out.count() >= config.max_output_length)
|
||||
break;
|
||||
out << child_out.str();
|
||||
}
|
||||
out << cl;
|
||||
}
|
||||
}
|
||||
|
||||
void fuzzJSONObject(std::shared_ptr<JSONNode> n, WriteBuffer & out, const StorageFuzzJSON::Configuration & config, pcg64 & rnd)
|
||||
{
|
||||
size_t node_count = 0;
|
||||
return fuzzJSONObject(n, out, config, rnd, /*depth*/ 0, node_count);
|
||||
}
|
||||
|
||||
class FuzzJSONSource : public ISource
|
||||
{
|
||||
public:
|
||||
FuzzJSONSource(
|
||||
UInt64 block_size_, Block block_header_, const StorageFuzzJSON::Configuration & config_, std::shared_ptr<JSONNode> json_root_)
|
||||
: ISource(block_header_)
|
||||
, block_size(block_size_)
|
||||
, block_header(std::move(block_header_))
|
||||
, config(config_)
|
||||
, rnd(config.random_seed)
|
||||
, json_root(json_root_)
|
||||
{
|
||||
}
|
||||
String getName() const override { return "FuzzJSON"; }
|
||||
|
||||
protected:
|
||||
Chunk generate() override
|
||||
{
|
||||
Columns columns;
|
||||
columns.reserve(block_header.columns());
|
||||
columns.emplace_back(createColumn());
|
||||
|
||||
return {std::move(columns), block_size};
|
||||
}
|
||||
|
||||
private:
|
||||
ColumnPtr createColumn();
|
||||
|
||||
UInt64 block_size;
|
||||
Block block_header;
|
||||
|
||||
StorageFuzzJSON::Configuration config;
|
||||
pcg64 rnd;
|
||||
|
||||
std::shared_ptr<JSONNode> json_root;
|
||||
};
|
||||
|
||||
ColumnPtr FuzzJSONSource::createColumn()
|
||||
{
|
||||
auto column = ColumnString::create();
|
||||
ColumnString::Chars & data_to = column->getChars();
|
||||
ColumnString::Offsets & offsets_to = column->getOffsets();
|
||||
|
||||
offsets_to.resize(block_size);
|
||||
IColumn::Offset offset = 0;
|
||||
|
||||
for (size_t row_num = 0; row_num < block_size; ++row_num)
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
fuzzJSONObject(json_root, out, config, rnd);
|
||||
|
||||
auto data = out.str();
|
||||
size_t data_len = data.size();
|
||||
|
||||
IColumn::Offset next_offset = offset + data_len + 1;
|
||||
data_to.resize(next_offset);
|
||||
|
||||
std::copy(data.begin(), data.end(), &data_to[offset]);
|
||||
|
||||
data_to[offset + data_len] = 0;
|
||||
offsets_to[row_num] = next_offset;
|
||||
|
||||
offset = next_offset;
|
||||
}
|
||||
|
||||
return column;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
StorageFuzzJSON::StorageFuzzJSON(
|
||||
const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_)
|
||||
: IStorage(table_id_), config(config_)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
storage_metadata.setComment(comment_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
}
|
||||
|
||||
Pipe StorageFuzzJSON::read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & /*query_info*/,
|
||||
ContextPtr /*context*/,
|
||||
QueryProcessingStage::Enum /*processed_stage*/,
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
{
|
||||
storage_snapshot->check(column_names);
|
||||
|
||||
Pipes pipes;
|
||||
pipes.reserve(num_streams);
|
||||
|
||||
const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns();
|
||||
Block block_header;
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
const auto & name_type = our_columns.get(name);
|
||||
MutableColumnPtr column = name_type.type->createColumn();
|
||||
block_header.insert({std::move(column), name_type.type, name_type.name});
|
||||
}
|
||||
|
||||
for (UInt64 i = 0; i < num_streams; ++i)
|
||||
pipes.emplace_back(std::make_shared<FuzzJSONSource>(max_block_size, block_header, config, parseJSON(config.json_str)));
|
||||
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
|
||||
static constexpr std::array<std::string_view, 13> optional_configuration_keys
|
||||
= {"json_str",
|
||||
"random_seed",
|
||||
"reuse_output",
|
||||
"probability",
|
||||
"max_output_length",
|
||||
"max_nesting_level",
|
||||
"max_array_size",
|
||||
"max_object_size",
|
||||
"max_string_value_length",
|
||||
"min_key_length",
|
||||
"max_key_length"};
|
||||
|
||||
void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection)
|
||||
{
|
||||
validateNamedCollection(
|
||||
collection,
|
||||
std::unordered_set<std::string>(),
|
||||
std::unordered_set<std::string>(optional_configuration_keys.begin(), optional_configuration_keys.end()));
|
||||
|
||||
if (collection.has("json_str"))
|
||||
configuration.json_str = collection.get<String>("json_str");
|
||||
|
||||
if (collection.has("random_seed"))
|
||||
configuration.random_seed = collection.get<UInt64>("random_seed");
|
||||
|
||||
if (collection.has("reuse_output"))
|
||||
configuration.should_reuse_output = static_cast<bool>(collection.get<UInt64>("reuse_output"));
|
||||
|
||||
if (collection.has("probability"))
|
||||
{
|
||||
configuration.probability = collection.get<Float64>("probability");
|
||||
|
||||
if (configuration.probability < 0.0 || configuration.probability > 1.0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the 'probability' argument must be within the interval [0, 1].");
|
||||
}
|
||||
|
||||
if (collection.has("max_output_length"))
|
||||
{
|
||||
configuration.max_output_length = collection.get<UInt64>("max_output_length");
|
||||
|
||||
if (configuration.max_output_length < 2 || configuration.max_output_length > configuration.output_length_limit)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The value of the 'max_output_length' argument must be within the interval [2, {}.]",
|
||||
configuration.output_length_limit);
|
||||
}
|
||||
|
||||
if (collection.has("max_nesting_level"))
|
||||
configuration.max_nesting_level = collection.get<UInt64>("max_nesting_level");
|
||||
|
||||
if (collection.has("max_array_size"))
|
||||
configuration.max_array_size = collection.get<UInt64>("max_array_size");
|
||||
|
||||
if (collection.has("max_object_size"))
|
||||
configuration.max_object_size = collection.get<UInt64>("max_object_size");
|
||||
|
||||
if (collection.has("max_string_value_length"))
|
||||
{
|
||||
auto max_string_value_length = collection.get<UInt64>("max_string_value_length");
|
||||
if (max_string_value_length > configuration.output_length_limit)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The value of the 'max_string_value_length' argument must be at most {}.",
|
||||
configuration.output_length_limit);
|
||||
|
||||
configuration.max_string_value_length = std::min(max_string_value_length, configuration.max_output_length);
|
||||
}
|
||||
|
||||
if (collection.has("max_key_length"))
|
||||
{
|
||||
auto max_key_length = collection.get<UInt64>("max_key_length");
|
||||
if (max_key_length > configuration.output_length_limit)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The value of the 'max_key_length' argument must be less or equal than {}.",
|
||||
configuration.output_length_limit);
|
||||
configuration.max_key_length = std::min(max_key_length, configuration.max_output_length);
|
||||
configuration.min_key_length = std::min(configuration.min_key_length, configuration.max_key_length);
|
||||
}
|
||||
|
||||
if (collection.has("min_key_length"))
|
||||
{
|
||||
auto min_key_length = collection.get<UInt64>("min_key_length");
|
||||
if (min_key_length == 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the 'min_key_length' argument must be at least 1.");
|
||||
|
||||
if (collection.has("max_key_length") && collection.get<UInt64>("max_key_length") < min_key_length)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The value of the 'min_key_length' argument must be less or equal than "
|
||||
"the value of the 'max_key_lenght' argument.");
|
||||
|
||||
configuration.min_key_length = min_key_length;
|
||||
configuration.max_key_length = std::max(configuration.max_key_length, configuration.min_key_length);
|
||||
}
|
||||
}
|
||||
|
||||
StorageFuzzJSON::Configuration StorageFuzzJSON::getConfiguration(ASTs & engine_args, ContextPtr local_context)
|
||||
{
|
||||
StorageFuzzJSON::Configuration configuration{};
|
||||
|
||||
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
|
||||
{
|
||||
StorageFuzzJSON::processNamedCollectionResult(configuration, *named_collection);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Supported signatures:
|
||||
//
|
||||
// FuzzJSON('json_str')
|
||||
// FuzzJSON('json_str', 'random_seed')
|
||||
if (engine_args.empty() || engine_args.size() > 2)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"FuzzJSON requires 1 to 2 arguments: "
|
||||
"json_str, random_seed");
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
|
||||
|
||||
auto first_arg = checkAndGetLiteralArgument<String>(engine_args[0], "json_str");
|
||||
configuration.json_str = std::move(first_arg);
|
||||
|
||||
if (engine_args.size() == 2)
|
||||
{
|
||||
const auto & literal = engine_args[1]->as<const ASTLiteral &>();
|
||||
if (!literal.value.isNull())
|
||||
configuration.random_seed = checkAndGetLiteralArgument<UInt64>(literal, "random_seed");
|
||||
}
|
||||
}
|
||||
return configuration;
|
||||
}
|
||||
|
||||
void registerStorageFuzzJSON(StorageFactory & factory)
|
||||
{
|
||||
factory.registerStorage(
|
||||
"FuzzJSON",
|
||||
[](const StorageFactory::Arguments & args) -> std::shared_ptr<StorageFuzzJSON>
|
||||
{
|
||||
ASTs & engine_args = args.engine_args;
|
||||
|
||||
if (engine_args.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzJSON must have arguments.");
|
||||
|
||||
StorageFuzzJSON::Configuration configuration = StorageFuzzJSON::getConfiguration(engine_args, args.getLocalContext());
|
||||
return std::make_shared<StorageFuzzJSON>(args.table_id, args.columns, args.comment, configuration);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
68
src/Storages/StorageFuzzJSON.h
Normal file
68
src/Storages/StorageFuzzJSON.h
Normal file
@ -0,0 +1,68 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageConfiguration.h>
|
||||
#include <Common/randomSeed.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class NamedCollection;
|
||||
|
||||
class StorageFuzzJSON final : public IStorage
|
||||
{
|
||||
public:
|
||||
struct Configuration : public StatelessTableEngineConfiguration
|
||||
{
|
||||
// A full N-ary tree may be memory-intensive as it can potentially contain
|
||||
// up to (B^(D + 1) - 1) / (B - 1) nodes, where B is the number of branches,
|
||||
// and D is the depth of the tree. Therefore, a value number limit is introduced.
|
||||
// This limit includes complex values (arrays and nested objects).
|
||||
static constexpr UInt64 value_number_limit = 1000;
|
||||
static constexpr UInt64 output_length_limit = 1LU << 16;
|
||||
|
||||
String json_str = "{}";
|
||||
UInt64 random_seed = randomSeed();
|
||||
bool should_reuse_output = false;
|
||||
Float64 probability = 0.25;
|
||||
|
||||
UInt64 max_output_length = 1024;
|
||||
|
||||
// Key parameters
|
||||
UInt64 min_key_length = 4;
|
||||
UInt64 max_key_length = 20;
|
||||
|
||||
// Value parameters
|
||||
// Maximum number of fields (key-value pairs) at each level of a JSON.
|
||||
UInt64 max_object_size = 10;
|
||||
// Maximum number of elements within a JSON array.
|
||||
UInt64 max_array_size = 10;
|
||||
// Max depth of nested structures. How deeply objects or arrays can be
|
||||
// nested within one another.
|
||||
UInt64 max_nesting_level = 5;
|
||||
UInt64 max_string_value_length = 32;
|
||||
};
|
||||
|
||||
StorageFuzzJSON(
|
||||
const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_);
|
||||
|
||||
std::string getName() const override { return "FuzzJSON"; }
|
||||
|
||||
Pipe read(
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams) override;
|
||||
|
||||
static void processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection);
|
||||
|
||||
static StorageFuzzJSON::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
|
||||
|
||||
private:
|
||||
const Configuration config;
|
||||
};
|
||||
|
||||
}
|
@ -1803,6 +1803,9 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot commit part because shutdown called");
|
||||
|
||||
Coordination::Requests ops;
|
||||
size_t num_check_ops;
|
||||
getOpsToCheckPartChecksumsAndCommit(zookeeper, part, hardlinked_files, replace_zero_copy_lock, ops, num_check_ops);
|
||||
@ -2833,6 +2836,9 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
|
||||
/// we can possibly duplicate entries in queue of cloned replica.
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot clone replica because shutdown called");
|
||||
|
||||
Coordination::Stat log_pointer_stat;
|
||||
String raw_log_pointer = zookeeper->get(fs::path(source_path) / "log_pointer", &log_pointer_stat);
|
||||
|
||||
@ -3199,6 +3205,9 @@ void StorageReplicatedMergeTree::cloneMetadataIfNeeded(const String & source_rep
|
||||
String source_columns;
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot clone metadata because shutdown called");
|
||||
|
||||
Coordination::Stat metadata_stat;
|
||||
Coordination::Stat columns_stat;
|
||||
source_metadata = zookeeper->get(source_path + "/metadata", &metadata_stat);
|
||||
@ -4028,6 +4037,8 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot remove part because shutdown called");
|
||||
|
||||
Coordination::Requests ops;
|
||||
|
||||
@ -4525,6 +4536,9 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id)
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot clean last part node because shutdown called");
|
||||
|
||||
Coordination::Stat added_parts_stat;
|
||||
String old_added_parts = zookeeper->get(quorum_last_part_path, &added_parts_stat);
|
||||
|
||||
@ -7256,6 +7270,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte
|
||||
/// Should work well if the number of concurrent mutation requests is small.
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot assign mutation because shutdown called");
|
||||
|
||||
Coordination::Stat mutations_stat;
|
||||
zookeeper->get(mutations_path, &mutations_stat);
|
||||
|
||||
@ -8526,6 +8543,9 @@ bool StorageReplicatedMergeTree::dropPartImpl(
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot drop part because shutdown called");
|
||||
|
||||
ReplicatedMergeTreeMergePredicate merge_pred = queue.getMergePredicate(zookeeper, PartitionIdsHint{part_info.partition_id});
|
||||
|
||||
auto part = getPartIfExists(part_info, {MergeTreeDataPartState::Active});
|
||||
@ -9852,6 +9872,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (shutdown_called || partial_shutdown_called)
|
||||
throw Exception(ErrorCodes::ABORTED, "Cannot create an empty part because shutdown called");
|
||||
|
||||
/// We should be careful when creating an empty part, because we are not sure that this part is still needed.
|
||||
/// For example, it's possible that part (or partition) was dropped (or replaced) concurrently.
|
||||
/// We can enqueue part for check from DataPartExchange or SelectProcessor
|
||||
|
@ -25,6 +25,7 @@ void registerStorageLiveView(StorageFactory & factory);
|
||||
void registerStorageGenerateRandom(StorageFactory & factory);
|
||||
void registerStorageExecutable(StorageFactory & factory);
|
||||
void registerStorageWindowView(StorageFactory & factory);
|
||||
void registerStorageFuzzJSON(StorageFactory & factory);
|
||||
|
||||
#if USE_AWS_S3
|
||||
void registerStorageS3(StorageFactory & factory);
|
||||
@ -123,8 +124,9 @@ void registerStorages()
|
||||
registerStorageGenerateRandom(factory);
|
||||
registerStorageExecutable(factory);
|
||||
registerStorageWindowView(factory);
|
||||
registerStorageFuzzJSON(factory);
|
||||
|
||||
#if USE_AWS_S3
|
||||
#if USE_AWS_S3
|
||||
registerStorageS3(factory);
|
||||
registerStorageCOS(factory);
|
||||
registerStorageOSS(factory);
|
||||
|
@ -28,3 +28,11 @@ endif ()
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::azure_sdk)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::simdjson)
|
||||
target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::simdjson)
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::rapidjson)
|
||||
target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::rapidjson)
|
||||
endif ()
|
||||
|
59
src/TableFunctions/TableFunctionFuzzJSON.cpp
Normal file
59
src/TableFunctions/TableFunctionFuzzJSON.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
#include <TableFunctions/TableFunctionFuzzJSON.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
|
||||
}
|
||||
|
||||
void TableFunctionFuzzJSON::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||
{
|
||||
ASTs & args_func = ast_function->children;
|
||||
|
||||
if (args_func.size() != 1)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName());
|
||||
|
||||
auto args = args_func.at(0)->children;
|
||||
configuration = StorageFuzzJSON::getConfiguration(args, context);
|
||||
}
|
||||
|
||||
ColumnsDescription TableFunctionFuzzJSON::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const
|
||||
{
|
||||
return ColumnsDescription{{"json", std::make_shared<DataTypeString>()}};
|
||||
}
|
||||
|
||||
StoragePtr TableFunctionFuzzJSON::executeImpl(
|
||||
const ASTPtr & /*ast_function*/,
|
||||
ContextPtr context,
|
||||
const std::string & table_name,
|
||||
ColumnsDescription /*cached_columns*/,
|
||||
bool is_insert_query) const
|
||||
{
|
||||
ColumnsDescription columns = getActualTableStructure(context, is_insert_query);
|
||||
auto res = std::make_shared<StorageFuzzJSON>(
|
||||
StorageID(getDatabaseName(), table_name),
|
||||
columns,
|
||||
/* comment */ String{},
|
||||
configuration);
|
||||
res->startup();
|
||||
return res;
|
||||
}
|
||||
|
||||
void registerTableFunctionFuzzJSON(TableFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<TableFunctionFuzzJSON>(
|
||||
{.documentation
|
||||
= {.description = "Perturbs a JSON string with random variations.",
|
||||
.returned_value = "A table object with a a single column containing perturbed JSON strings."},
|
||||
.allow_readonly = true});
|
||||
}
|
||||
|
||||
}
|
39
src/TableFunctions/TableFunctionFuzzJSON.h
Normal file
39
src/TableFunctions/TableFunctionFuzzJSON.h
Normal file
@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include <Storages/StorageFuzzJSON.h>
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class TableFunctionFuzzJSON : public ITableFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "fuzzJSON";
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
|
||||
|
||||
private:
|
||||
StoragePtr executeImpl(
|
||||
const ASTPtr & ast_function,
|
||||
ContextPtr context,
|
||||
const std::string & table_name,
|
||||
ColumnsDescription cached_columns,
|
||||
bool is_insert_query) const override;
|
||||
|
||||
const char * getStorageTypeName() const override { return "FuzzJSON"; }
|
||||
|
||||
String source;
|
||||
std::optional<UInt64> random_seed;
|
||||
std::optional<bool> should_reuse_output;
|
||||
std::optional<UInt64> max_output_length;
|
||||
|
||||
StorageFuzzJSON::Configuration configuration;
|
||||
};
|
||||
|
||||
}
|
@ -22,6 +22,7 @@ void registerTableFunctions()
|
||||
registerTableFunctionGenerate(factory);
|
||||
registerTableFunctionMongoDB(factory);
|
||||
registerTableFunctionRedis(factory);
|
||||
registerTableFunctionFuzzJSON(factory);
|
||||
|
||||
#if USE_AWS_S3
|
||||
registerTableFunctionS3(factory);
|
||||
|
@ -19,6 +19,7 @@ void registerTableFunctionInput(TableFunctionFactory & factory);
|
||||
void registerTableFunctionGenerate(TableFunctionFactory & factory);
|
||||
void registerTableFunctionMongoDB(TableFunctionFactory & factory);
|
||||
void registerTableFunctionRedis(TableFunctionFactory & factory);
|
||||
void registerTableFunctionFuzzJSON(TableFunctionFactory & factory);
|
||||
|
||||
#if USE_AWS_S3
|
||||
void registerTableFunctionS3(TableFunctionFactory & factory);
|
||||
|
@ -40,3 +40,6 @@
|
||||
02784_parallel_replicas_automatic_decision_join
|
||||
02818_parameterized_view_with_cte_multiple_usage
|
||||
02815_range_dict_no_direct_join
|
||||
# Flaky. Please don't delete them without fixing them:
|
||||
01600_parts_states_metrics_long
|
||||
01287_max_execution_speed
|
||||
|
@ -58,27 +58,37 @@ class ReleaseBranch:
|
||||
CHERRYPICK_DESCRIPTION = """Original pull-request #{pr_number}
|
||||
|
||||
This pull-request is a first step of an automated backporting.
|
||||
It contains changes like after calling a local command `git cherry-pick`.
|
||||
If you intend to continue backporting this changes, then resolve all conflicts if any.
|
||||
It contains changes similar to calling `git cherry-pick` locally.
|
||||
If you intend to continue backporting the changes, then resolve all conflicts if any.
|
||||
Otherwise, if you do not want to backport them, then just close this pull-request.
|
||||
|
||||
The check results does not matter at this step - you can safely ignore them.
|
||||
|
||||
### Note
|
||||
|
||||
This pull-request will be merged automatically as it reaches the mergeable state, \
|
||||
**do not merge it manually**. It's 100% safe, but completely meaningless.
|
||||
This pull-request will be merged automatically. Please, **do not merge it manually** \
|
||||
(but if you accidentally did, nothing bad will happen).
|
||||
|
||||
### If the PR was closed and then reopened
|
||||
### Troubleshooting
|
||||
|
||||
If it stuck (e.g. for a day), check {pr_url} for `{backport_created_label}` *label* and \
|
||||
delete it if necessary. Manually merging will do nothing, since \
|
||||
`{backport_created_label}` *label* prevents the original PR {pr_url} from being \
|
||||
processed.
|
||||
#### If the PR was manually reopened after being closed
|
||||
|
||||
If the cherry-pick PR is completely screwed, and you want to recreate it: delete the \
|
||||
`{label_cherrypick}` label and delete this branch.
|
||||
You may also need to delete the `{backport_created_label}` label from the original PR.
|
||||
If this PR is stuck (i.e. not automatically merged after one day), check {pr_url} for \
|
||||
`{backport_created_label}` *label* and delete it.
|
||||
|
||||
Manually merging will do nothing. The `{backport_created_label}` *label* prevents the \
|
||||
original PR {pr_url} from being processed.
|
||||
|
||||
#### If the conflicts were resolved in a wrong way
|
||||
|
||||
If this cherry-pick PR is completely screwed by a wrong conflicts resolution, and you \
|
||||
want to recreate it:
|
||||
|
||||
- delete the `{label_cherrypick}` label from the PR
|
||||
- delete this branch from the repository
|
||||
|
||||
You also need to check the original PR {pr_url} for `{backport_created_label}`, and \
|
||||
delete if it's presented there
|
||||
"""
|
||||
BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
|
||||
backporting.
|
||||
|
@ -14,4 +14,8 @@
|
||||
|
||||
<!-- Default timeout is 5 sec. Set it to 10 to avoid tests flakiness with slow builds (debug, tsan) -->
|
||||
<sync_request_timeout>10</sync_request_timeout>
|
||||
|
||||
<!-- Directory containing the proto files for the well-known Protobuf types. -->
|
||||
<google_protos_path>/usr/share/clickhouse/protos/</google_protos_path>
|
||||
|
||||
</config>
|
||||
|
@ -0,0 +1,4 @@
|
||||
<clickhouse>
|
||||
<get_client_http_header_forbidden_headers>FORBIDDEN-KEY1,FORBIDDEN-KEY2</get_client_http_header_forbidden_headers>
|
||||
<allow_get_client_http_header>1</allow_get_client_http_header>
|
||||
</clickhouse>
|
@ -15,6 +15,7 @@ mkdir -p $DEST_SERVER_PATH/config.d/
|
||||
mkdir -p $DEST_SERVER_PATH/users.d/
|
||||
mkdir -p $DEST_CLIENT_PATH
|
||||
|
||||
ln -sf $SRC_PATH/config.d/forbidden_get_client_http_headers.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/zookeeper_write.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/listen.xml $DEST_SERVER_PATH/config.d/
|
||||
ln -sf $SRC_PATH/config.d/text_log.xml $DEST_SERVER_PATH/config.d/
|
||||
|
@ -4130,14 +4130,14 @@ class ClickHouseInstance:
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/system.sql",
|
||||
"if [ ! -f /var/lib/clickhouse/metadata/system.sql ]; then echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/system.sql; fi",
|
||||
]
|
||||
)
|
||||
self.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/default.sql",
|
||||
"if [ ! -f /var/lib/clickhouse/metadata/default.sql ]; then echo 'ATTACH DATABASE system ENGINE=Ordinary' > /var/lib/clickhouse/metadata/default.sql; fi",
|
||||
]
|
||||
)
|
||||
self.exec_in_container(
|
||||
|
@ -1,19 +1,21 @@
|
||||
def corrupt_part_data_on_disk(node, table, part_name):
|
||||
def corrupt_part_data_on_disk(node, table, part_name, file_ext=".bin", database=None):
|
||||
part_path = node.query(
|
||||
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
|
||||
table, part_name
|
||||
"SELECT path FROM system.parts WHERE table = '{}' and name = '{}' {}".format(
|
||||
table,
|
||||
part_name,
|
||||
f"AND database = '{database}'" if database is not None else "",
|
||||
)
|
||||
).strip()
|
||||
|
||||
corrupt_part_data_by_path(node, part_path)
|
||||
corrupt_part_data_by_path(node, part_path, file_ext)
|
||||
|
||||
|
||||
def corrupt_part_data_by_path(node, part_path):
|
||||
def corrupt_part_data_by_path(node, part_path, file_ext=".bin"):
|
||||
print("Corrupting part", part_path, "at", node.name)
|
||||
print(
|
||||
"Will corrupt: ",
|
||||
node.exec_in_container(
|
||||
["bash", "-c", "cd {p} && ls *.bin | head -n 1".format(p=part_path)]
|
||||
["bash", "-c", f"cd {part_path} && ls *{file_ext} | head -n 1"]
|
||||
),
|
||||
)
|
||||
|
||||
@ -21,9 +23,7 @@ def corrupt_part_data_by_path(node, part_path):
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
|
||||
p=part_path
|
||||
),
|
||||
f"cd {part_path} && ls *{file_ext} | head -n 1 | xargs -I{{}} sh -c 'truncate -s -1 $1' -- {{}}",
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
@ -3,6 +3,7 @@ import pytest
|
||||
import concurrent
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.client import QueryRuntimeException
|
||||
from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
@ -21,22 +22,6 @@ def started_cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def corrupt_data_part_on_disk(node, database, table, part_name):
|
||||
part_path = node.query(
|
||||
f"SELECT path FROM system.parts WHERE database = '{database}' AND table = '{table}' AND name = '{part_name}'"
|
||||
).strip()
|
||||
node.exec_in_container(
|
||||
[
|
||||
"bash",
|
||||
"-c",
|
||||
"cd {p} && ls *.bin | head -n 1 | xargs -I{{}} sh -c 'echo \"1\" >> $1' -- {{}}".format(
|
||||
p=part_path
|
||||
),
|
||||
],
|
||||
privileged=True,
|
||||
)
|
||||
|
||||
|
||||
def remove_checksums_on_disk(node, database, table, part_name):
|
||||
part_path = node.query(
|
||||
f"SELECT path FROM system.parts WHERE database = '{database}' AND table = '{table}' AND name = '{part_name}'"
|
||||
@ -59,14 +44,15 @@ def remove_part_from_disk(node, table, part_name):
|
||||
)
|
||||
|
||||
|
||||
def test_check_normal_table_corruption(started_cluster):
|
||||
@pytest.mark.parametrize("merge_tree_settings", [""])
|
||||
def test_check_normal_table_corruption(started_cluster, merge_tree_settings):
|
||||
node1.query("DROP TABLE IF EXISTS non_replicated_mt")
|
||||
|
||||
node1.query(
|
||||
"""
|
||||
f"""
|
||||
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part=0;
|
||||
{merge_tree_settings};
|
||||
"""
|
||||
)
|
||||
|
||||
@ -105,7 +91,9 @@ def test_check_normal_table_corruption(started_cluster):
|
||||
|
||||
assert node1.query("SELECT COUNT() FROM non_replicated_mt") == "2\n"
|
||||
|
||||
corrupt_data_part_on_disk(node1, "default", "non_replicated_mt", "201902_1_1_0")
|
||||
corrupt_part_data_on_disk(
|
||||
node1, "non_replicated_mt", "201902_1_1_0", database="default"
|
||||
)
|
||||
|
||||
assert node1.query(
|
||||
"CHECK TABLE non_replicated_mt",
|
||||
@ -129,7 +117,9 @@ def test_check_normal_table_corruption(started_cluster):
|
||||
== "201901_2_2_0\t1\t\n"
|
||||
)
|
||||
|
||||
corrupt_data_part_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
|
||||
corrupt_part_data_on_disk(
|
||||
node1, "non_replicated_mt", "201901_2_2_0", database="default"
|
||||
)
|
||||
|
||||
remove_checksums_on_disk(node1, "default", "non_replicated_mt", "201901_2_2_0")
|
||||
|
||||
@ -139,16 +129,23 @@ def test_check_normal_table_corruption(started_cluster):
|
||||
).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
|
||||
|
||||
|
||||
def test_check_replicated_table_simple(started_cluster):
|
||||
@pytest.mark.parametrize("merge_tree_settings, zk_path_suffix", [("", "_0")])
|
||||
def test_check_replicated_table_simple(
|
||||
started_cluster, merge_tree_settings, zk_path_suffix
|
||||
):
|
||||
for node in [node1, node2]:
|
||||
node.query("DROP TABLE IF EXISTS replicated_mt")
|
||||
node.query("DROP TABLE IF EXISTS replicated_mt SYNC")
|
||||
|
||||
node.query(
|
||||
"""
|
||||
CREATE TABLE replicated_mt(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_{zk_path_suffix}', '{replica}')
|
||||
PARTITION BY toYYYYMM(date) ORDER BY id
|
||||
{merge_tree_settings}
|
||||
""".format(
|
||||
replica=node.name
|
||||
replica=node.name,
|
||||
zk_path_suffix=zk_path_suffix,
|
||||
merge_tree_settings=merge_tree_settings,
|
||||
)
|
||||
)
|
||||
|
||||
@ -220,16 +217,32 @@ def test_check_replicated_table_simple(started_cluster):
|
||||
)
|
||||
|
||||
|
||||
def test_check_replicated_table_corruption(started_cluster):
|
||||
@pytest.mark.parametrize(
|
||||
"merge_tree_settings, zk_path_suffix, part_file_ext",
|
||||
[
|
||||
(
|
||||
"",
|
||||
"_0",
|
||||
".bin",
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_check_replicated_table_corruption(
|
||||
started_cluster, merge_tree_settings, zk_path_suffix, part_file_ext
|
||||
):
|
||||
for node in [node1, node2]:
|
||||
node.query_with_retry("DROP TABLE IF EXISTS replicated_mt_1")
|
||||
node.query_with_retry("DROP TABLE IF EXISTS replicated_mt_1 SYNC")
|
||||
|
||||
node.query_with_retry(
|
||||
"""
|
||||
CREATE TABLE replicated_mt_1(date Date, id UInt32, value Int32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id;
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_1_{zk_path_suffix}', '{replica}')
|
||||
PARTITION BY toYYYYMM(date) ORDER BY id
|
||||
{merge_tree_settings}
|
||||
""".format(
|
||||
replica=node.name
|
||||
replica=node.name,
|
||||
merge_tree_settings=merge_tree_settings,
|
||||
zk_path_suffix=zk_path_suffix,
|
||||
)
|
||||
)
|
||||
|
||||
@ -248,7 +261,10 @@ def test_check_replicated_table_corruption(started_cluster):
|
||||
"SELECT name from system.parts where table = 'replicated_mt_1' and partition_id = '201901' and active = 1"
|
||||
).strip()
|
||||
|
||||
corrupt_data_part_on_disk(node1, "default", "replicated_mt_1", part_name)
|
||||
corrupt_part_data_on_disk(
|
||||
node1, "replicated_mt_1", part_name, part_file_ext, database="default"
|
||||
)
|
||||
|
||||
assert node1.query(
|
||||
"CHECK TABLE replicated_mt_1 PARTITION 201901",
|
||||
settings={"check_query_single_value_result": 0, "max_threads": 1},
|
||||
|
@ -11,6 +11,7 @@
|
||||
<operation_timeout_ms>5000</operation_timeout_ms>
|
||||
<session_timeout_ms>10000</session_timeout_ms>
|
||||
<raft_logs_level>trace</raft_logs_level>
|
||||
<max_memory_usage_soft_limit>10000000</max_memory_usage_soft_limit>
|
||||
|
||||
<!-- For instant start in single node configuration -->
|
||||
<heart_beat_interval_ms>0</heart_beat_interval_ms>
|
||||
|
@ -45,12 +45,12 @@ def test_soft_limit_create(started_cluster):
|
||||
keeper_utils.wait_until_connected(started_cluster, node)
|
||||
try:
|
||||
node_zk = get_connection_zk("node")
|
||||
loop_time = 1000000
|
||||
loop_time = 10000
|
||||
node_zk.create("/test_soft_limit", b"abc")
|
||||
|
||||
for i in range(loop_time):
|
||||
node_zk.create(
|
||||
"/test_soft_limit/node_" + str(i), random_string(100).encode()
|
||||
"/test_soft_limit/node_" + str(i), random_string(1000).encode()
|
||||
)
|
||||
except ConnectionLoss:
|
||||
txn = node_zk.transaction()
|
||||
|
@ -1351,3 +1351,48 @@ def test_replicated_table_structure_alter(started_cluster):
|
||||
assert "1\t2\t3\t0\n1\t2\t3\t4\n" == dummy_node.query(
|
||||
"SELECT * FROM table_structure.rmt ORDER BY k"
|
||||
)
|
||||
|
||||
|
||||
def test_modify_comment(started_cluster):
|
||||
main_node.query(
|
||||
"CREATE DATABASE modify_comment_db ENGINE = Replicated('/test/modify_comment', 'shard1', 'replica' || '1');"
|
||||
)
|
||||
|
||||
dummy_node.query(
|
||||
"CREATE DATABASE modify_comment_db ENGINE = Replicated('/test/modify_comment', 'shard1', 'replica' || '2');"
|
||||
)
|
||||
|
||||
main_node.query(
|
||||
"CREATE TABLE modify_comment_db.modify_comment_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);"
|
||||
)
|
||||
|
||||
def restart_verify_not_readonly():
|
||||
main_node.restart_clickhouse()
|
||||
assert (
|
||||
main_node.query(
|
||||
"SELECT is_readonly FROM system.replicas WHERE table = 'modify_comment_table'"
|
||||
)
|
||||
== "0\n"
|
||||
)
|
||||
dummy_node.restart_clickhouse()
|
||||
assert (
|
||||
dummy_node.query(
|
||||
"SELECT is_readonly FROM system.replicas WHERE table = 'modify_comment_table'"
|
||||
)
|
||||
== "0\n"
|
||||
)
|
||||
|
||||
main_node.query(
|
||||
"ALTER TABLE modify_comment_db.modify_comment_table COMMENT COLUMN d 'Some comment'"
|
||||
)
|
||||
|
||||
restart_verify_not_readonly()
|
||||
|
||||
main_node.query(
|
||||
"ALTER TABLE modify_comment_db.modify_comment_table MODIFY COMMENT 'Some error comment'"
|
||||
)
|
||||
|
||||
restart_verify_not_readonly()
|
||||
|
||||
main_node.query("DROP DATABASE modify_comment_db SYNC")
|
||||
dummy_node.query("DROP DATABASE modify_comment_db SYNC")
|
||||
|
@ -8,8 +8,6 @@ SELECT count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one) LIMIT 1 SET
|
||||
SELECT 'distributed_group_by_no_merge=2';
|
||||
SET max_distributed_connections=1;
|
||||
SET max_threads=1;
|
||||
-- breaks any(_shard_num)
|
||||
SET optimize_move_functions_out_of_any=0;
|
||||
|
||||
SELECT 'LIMIT';
|
||||
SELECT * FROM (SELECT any(_shard_num) shard_num, count(), uniq(dummy) FROM remote('127.0.0.{2,3}', system.one)) ORDER BY shard_num LIMIT 1 SETTINGS distributed_group_by_no_merge=2;
|
||||
|
@ -64,3 +64,13 @@ no formatting pattern no formatting pattern
|
||||
2022-12-08 18:11:29.000000
|
||||
2022-12-08 00:00:00.000000
|
||||
2022-12-08 00:00:00.000000
|
||||
01
|
||||
01
|
||||
02
|
||||
02
|
||||
02
|
||||
1
|
||||
01
|
||||
2
|
||||
2
|
||||
02
|
||||
|
@ -90,3 +90,15 @@ select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T
|
||||
select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
|
||||
select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
|
||||
select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
|
||||
|
||||
-- %c %k %l with different formatdatetime_format_without_leading_zeros
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros = 0;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros = 0;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros = 0;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros = 0;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros = 0;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%c') settings formatdatetime_format_without_leading_zeros = 1;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%m') settings formatdatetime_format_without_leading_zeros = 1;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%k') settings formatdatetime_format_without_leading_zeros = 1;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%l') settings formatdatetime_format_without_leading_zeros = 1;
|
||||
select formatDateTime(toDateTime('2022-01-08 02:11:29', 'UTC'), '%h') settings formatdatetime_format_without_leading_zeros = 1;
|
||||
|
@ -69,7 +69,7 @@ function alter_table()
|
||||
if [ -z "$table" ]; then continue; fi
|
||||
$CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \
|
||||
"alter table $table update n = n + (select max(n) from merge(REGEXP('${CLICKHOUSE_DATABASE}.*'), '.*')) where 1 settings allow_nondeterministic_mutations=1" \
|
||||
2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED | grep -Fv "Error while executing table function merge"
|
||||
2>&1| grep -Fa "Exception: " | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv UNKNOWN_TABLE | grep -Fv TABLE_IS_READ_ONLY | grep -Fv TABLE_IS_DROPPED | grep -Fv ABORTED | grep -Fv "Error while executing table function merge"
|
||||
sleep 0.$RANDOM
|
||||
done
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
set optimize_aggregators_of_group_by_keys = 1;
|
||||
set optimize_move_functions_out_of_any = 0;
|
||||
|
||||
SELECT min(number % 2) AS a, max(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
|
||||
SELECT any(number % 2) AS a, anyLast(number % 3) AS b FROM numbers(10000000) GROUP BY number % 2, number % 3 ORDER BY a, b;
|
||||
|
@ -1,32 +0,0 @@
|
||||
SELECT any(number) + (any(number) * 2)
|
||||
FROM numbers(1, 2)
|
||||
3
|
||||
SELECT anyLast(number) + (anyLast(number) * 2)
|
||||
FROM numbers(1, 2)
|
||||
6
|
||||
WITH any(number) * 3 AS x
|
||||
SELECT x
|
||||
FROM numbers(1, 2)
|
||||
3
|
||||
SELECT
|
||||
anyLast(number) * 3 AS x,
|
||||
x
|
||||
FROM numbers(1, 2)
|
||||
6 6
|
||||
SELECT any(number + (number * 2))
|
||||
FROM numbers(1, 2)
|
||||
3
|
||||
SELECT anyLast(number + (number * 2))
|
||||
FROM numbers(1, 2)
|
||||
6
|
||||
WITH any(number * 3) AS x
|
||||
SELECT x
|
||||
FROM numbers(1, 2)
|
||||
3
|
||||
SELECT
|
||||
anyLast(number * 3) AS x,
|
||||
x
|
||||
FROM numbers(1, 2)
|
||||
6 6
|
||||
arrayJoin
|
||||
0 []
|
@ -1,34 +0,0 @@
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
|
||||
EXPLAIN SYNTAX SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
||||
|
||||
SET optimize_move_functions_out_of_any = 0;
|
||||
|
||||
EXPLAIN SYNTAX SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN SYNTAX SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
||||
|
||||
SELECT 'arrayJoin';
|
||||
SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number;
|
@ -1,8 +0,0 @@
|
||||
"n"
|
||||
0
|
||||
SELECT any(number) * any(number) AS n
|
||||
FROM numbers(100)
|
||||
"n"
|
||||
0,0
|
||||
SELECT (any(number), any(number) * 2) AS n
|
||||
FROM numbers(100)
|
@ -1,7 +0,0 @@
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
|
||||
SELECT any(number * number) AS n FROM numbers(100) FORMAT CSVWithNames;
|
||||
EXPLAIN SYNTAX SELECT any(number * number) AS n FROM numbers(100);
|
||||
|
||||
SELECT any((number, number * 2)) as n FROM numbers(100) FORMAT CSVWithNames;
|
||||
EXPLAIN SYNTAX SELECT any((number, number * 2)) as n FROM numbers(100);
|
@ -1,19 +0,0 @@
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test
|
||||
(
|
||||
`Source.C1` Array(UInt64),
|
||||
`Source.C2` Array(UInt64)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY tuple();
|
||||
|
||||
SET enable_positional_arguments=0;
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
|
||||
SELECT any(arrayFilter((c, d) -> (4 = d), `Source.C1`, `Source.C2`)[1]) AS x
|
||||
FROM test
|
||||
WHERE 0
|
||||
GROUP BY 42;
|
||||
|
||||
DROP TABLE test;
|
@ -1,10 +1,8 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
other
|
||||
google
|
||||
1
|
||||
1
|
||||
2
|
||||
other
|
||||
other
|
||||
|
@ -1,11 +1,9 @@
|
||||
-- Tags: distributed
|
||||
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
SET optimize_injective_functions_inside_uniq = 1;
|
||||
SET optimize_arithmetic_operations_in_aggregate_functions = 1;
|
||||
SET optimize_if_transform_strings_to_enum = 1;
|
||||
|
||||
SELECT any(number + 1) FROM numbers(1);
|
||||
SELECT uniq(bitNot(number)) FROM numbers(1);
|
||||
SELECT sum(number + 1) FROM numbers(1);
|
||||
SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM numbers(1);
|
||||
@ -20,7 +18,6 @@ CREATE TABLE dist AS local_table ENGINE = Distributed(test_cluster_two_shards_lo
|
||||
|
||||
INSERT INTO local_table SELECT number FROM numbers(1);
|
||||
|
||||
SELECT any(number + 1) FROM dist;
|
||||
SELECT uniq(bitNot(number)) FROM dist;
|
||||
SELECT sum(number + 1) FROM dist;
|
||||
SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM dist;
|
||||
|
@ -1 +0,0 @@
|
||||
Hello
|
@ -1,6 +0,0 @@
|
||||
SELECT any(nullIf(s, '')) FROM (SELECT arrayJoin(['', 'Hello']) AS s);
|
||||
|
||||
SET optimize_move_functions_out_of_any = 0;
|
||||
EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
EXPLAIN SYNTAX select any(nullIf('', ''), 'some text'); -- { serverError 42 }
|
@ -90,7 +90,7 @@ hexdump -C $BINARY_FILE_PATH
|
||||
|
||||
echo
|
||||
echo "Decoded with protoc:"
|
||||
(cd $SCHEMADIR && $PROTOC_BINARY --decode Message "$PROTOBUF_FILE_NAME".proto) < $BINARY_FILE_PATH
|
||||
(cd $SCHEMADIR && $PROTOC_BINARY --proto_path=. --proto_path=/usr/share/clickhouse/protos --decode Message "$PROTOBUF_FILE_NAME".proto) < $BINARY_FILE_PATH
|
||||
|
||||
echo
|
||||
echo "Proto message with wrapper for (NULL, 1), ('', 2), ('str', 3):"
|
||||
|
@ -320,6 +320,7 @@ geoDistance
|
||||
geohashDecode
|
||||
geohashEncode
|
||||
geohashesInBox
|
||||
getClientHTTPHeader
|
||||
getMacro
|
||||
getOSKernelVersion
|
||||
getServerPort
|
||||
|
@ -243,3 +243,30 @@ select parseDateTime('12 AM'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||
select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
-- Fuzzer crash bug #53715
|
||||
select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN }
|
||||
-- %h
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
|
||||
2022-08-13 19:58:32
|
||||
-- %l accepts single or double digits inputs
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
|
||||
2022-08-13 19:58:32
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
|
||||
2022-08-13 19:58:32
|
||||
-- %H
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
-- %k accepts single or double digits inputs
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
-- %m
|
||||
select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
-- %c accepts single or double digits inputs
|
||||
select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
2022-08-13 07:58:32
|
||||
|
@ -168,4 +168,23 @@ select parseDateTime('12 AM', '%h %p', 'UTC', 'a fourth argument'); -- { serverE
|
||||
-- Fuzzer crash bug #53715
|
||||
select parseDateTime('', '', toString(number)) from numbers(13); -- { serverError ILLEGAL_COLUMN }
|
||||
|
||||
-- %h
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %h:%i:%s %p', 'UTC');
|
||||
-- %l accepts single or double digits inputs
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32 PM', '%b %e, %G, %l:%i:%s %p', 'UTC');
|
||||
-- %H
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %H:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %H:%i:%s', 'UTC');
|
||||
-- %k accepts single or double digits inputs
|
||||
select parseDateTime('Aug 13, 2022, 7:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
|
||||
select parseDateTime('Aug 13, 2022, 07:58:32', '%b %e, %G, %k:%i:%s', 'UTC');
|
||||
-- %m
|
||||
select parseDateTime('8 13, 2022, 7:58:32', '%m %e, %G, %k:%i:%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
|
||||
select parseDateTime('08 13, 2022, 07:58:32', '%m %e, %G, %k:%i:%s', 'UTC');
|
||||
-- %c accepts single or double digits inputs
|
||||
select parseDateTime('8 13, 2022, 7:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
select parseDateTime('08 13, 2022, 07:58:32', '%c %e, %G, %k:%i:%s', 'UTC');
|
||||
|
||||
-- { echoOff }
|
||||
|
@ -1,124 +0,0 @@
|
||||
-- { echoOn }
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
any(plus(number, multiply(number, 2))) UInt64
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 1
|
||||
FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 2
|
||||
FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 5, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
FUNCTION id: 10, function_name: any, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 11, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
JOIN TREE
|
||||
TABLE_FUNCTION id: 7, table_function_name: numbers
|
||||
ARGUMENTS
|
||||
LIST id: 13, nodes: 2
|
||||
CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
|
||||
CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
3
|
||||
EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
anyLast(plus(number, multiply(number, 2))) UInt64
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 1
|
||||
FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 2
|
||||
FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 5, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
FUNCTION id: 10, function_name: anyLast, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 11, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
JOIN TREE
|
||||
TABLE_FUNCTION id: 7, table_function_name: numbers
|
||||
ARGUMENTS
|
||||
LIST id: 13, nodes: 2
|
||||
CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
|
||||
CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
6
|
||||
EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
x UInt64
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 1
|
||||
FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 2
|
||||
FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 5, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
|
||||
JOIN TREE
|
||||
TABLE_FUNCTION id: 7, table_function_name: numbers
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
|
||||
CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
3
|
||||
EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
x UInt64
|
||||
x UInt64
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 2
|
||||
FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 2
|
||||
FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 5, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
|
||||
FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 2
|
||||
FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
|
||||
ARGUMENTS
|
||||
LIST id: 5, nodes: 1
|
||||
COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
|
||||
CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
|
||||
JOIN TREE
|
||||
TABLE_FUNCTION id: 7, table_function_name: numbers
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
|
||||
CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
6 6
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
||||
SET optimize_move_functions_out_of_any = 0;
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
3
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
6
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
3
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
6 6
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
@ -1,33 +0,0 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
-- { echoOn }
|
||||
SET optimize_move_functions_out_of_any = 1;
|
||||
|
||||
EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
|
||||
EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
||||
|
||||
|
||||
|
||||
SET optimize_move_functions_out_of_any = 0;
|
||||
|
||||
SELECT any(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
SELECT anyLast(number + number * 2) FROM numbers(1, 2);
|
||||
|
||||
WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
|
||||
|
||||
SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
|
||||
|
||||
SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
|
||||
-- { echoOff }
|
@ -40,6 +40,7 @@ multiple_joins_rewriter_version
|
||||
odbc_max_field_size
|
||||
optimize_duplicate_order_by_and_distinct
|
||||
optimize_fuse_sum_count_avg
|
||||
optimize_move_functions_out_of_any
|
||||
parallel_replicas_min_number_of_granules_to_enable
|
||||
partial_merge_join_optimizations
|
||||
query_cache_store_results_of_queries_with_nondeterministic_functions
|
||||
|
@ -0,0 +1,13 @@
|
||||
value
|
||||
value1 value2
|
||||
value1 value1 value2
|
||||
NOT-FOUND-KEY is not in HTTP request headers
|
||||
FORBIDDEN-KEY1 is in get_client_http_header_forbidden_headers
|
||||
1 row1_value1 row1_value2 row1_value3 row1_value4 row1_value5 row1_value6 row1_value7
|
||||
2 row2_value1 row2_value2 row2_value3 row2_value4 row2_value5 row2_value6 row2_value7
|
||||
3
|
||||
value_from_query_1 value_from_query_2 value_from_query_3 1 row1_value1 row1_value2 row1_value3 row1_value4 row1_value5 row1_value6 row1_value7
|
||||
value_from_query_1 value_from_query_2 value_from_query_3 2 row2_value1 row2_value2 row2_value3 row2_value4 row2_value5 row2_value6 row2_value7
|
||||
value_from_query_1 value_from_query_2 value_from_query_3 3
|
||||
http_value1
|
||||
http_value2
|
75
tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
Executable file
75
tests/queries/0_stateless/02911_getHTTPHeaderFuncion.sh
Executable file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
echo "SELECT getClientHTTPHeader('key')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' -H 'key: value' 'http://localhost:8123/' -d @-
|
||||
|
||||
echo "SELECT getClientHTTPHeader('key1'), getClientHTTPHeader('key2')" | curl -s -H 'X-Clickhouse-User: default' \
|
||||
-H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @-
|
||||
|
||||
echo "SELECT getClientHTTPHeader('test-' || 'key' || '-1'), getClientHTTPHeader('test-key-1'), getClientHTTPHeader('key2')" | curl -s -H 'X-Clickhouse-User: default' \
|
||||
-H 'X-ClickHouse-Key: ' -H 'test-key-1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @-
|
||||
|
||||
#Code: 36. DB::Exception: NOT-FOUND-KEY is not in HTTP request headers
|
||||
echo "SELECT getClientHTTPHeader('NOT-FOUND-KEY')"| curl -s -H 'X-Clickhouse-User: default' \
|
||||
-H 'X-ClickHouse-Key: ' -H 'key1: value1' -H 'key2: value2' 'http://localhost:8123/' -d @- | grep -o -e "NOT-FOUND-KEY is not in HTTP request headers"
|
||||
|
||||
#Code: 36. DB::Exception: The header FORBIDDEN-KEY is in headers_forbidden_to_return, you can config it in config file.
|
||||
echo "SELECT getClientHTTPHeader('FORBIDDEN-KEY1')" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
|
||||
-H 'FORBIDDEN-KEY1: forbbiden1' 'http://localhost:8123/' -d @- | grep -o -e "FORBIDDEN-KEY1 is in get_client_http_header_forbidden_headers"
|
||||
|
||||
db_name=${CLICKHOUSE_DATABASE}
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "CREATE DATABASE IF NOT EXISTS ${db_name};"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE ${db_name}.02884_get_http_header
|
||||
(id UInt32,
|
||||
http_key1 String DEFAULT getClientHTTPHeader('http_header_key1'),
|
||||
http_key2 String DEFAULT getClientHTTPHeader('http_header_key2'),
|
||||
http_key3 String DEFAULT getClientHTTPHeader('http_header_key3'),
|
||||
http_key4 String DEFAULT getClientHTTPHeader('http_header_key4'),
|
||||
http_key5 String DEFAULT getClientHTTPHeader('http_header_key5'),
|
||||
http_key6 String DEFAULT getClientHTTPHeader('http_header_key6'),
|
||||
http_key7 String DEFAULT getClientHTTPHeader('http_header_key7')
|
||||
)
|
||||
Engine=MergeTree()
|
||||
ORDER BY id"
|
||||
|
||||
#Insert data via http request
|
||||
echo "INSERT INTO ${db_name}.02884_get_http_header (id) values (1)" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
|
||||
-H 'http_header_key1: row1_value1'\
|
||||
-H 'http_header_key2: row1_value2'\
|
||||
-H 'http_header_key3: row1_value3'\
|
||||
-H 'http_header_key4: row1_value4'\
|
||||
-H 'http_header_key5: row1_value5'\
|
||||
-H 'http_header_key6: row1_value6'\
|
||||
-H 'http_header_key7: row1_value7' 'http://localhost:8123/' -d @-
|
||||
|
||||
echo "INSERT INTO ${db_name}.02884_get_http_header (id) values (2)" | curl -s -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' \
|
||||
-H 'http_header_key1: row2_value1'\
|
||||
-H 'http_header_key2: row2_value2'\
|
||||
-H 'http_header_key3: row2_value3'\
|
||||
-H 'http_header_key4: row2_value4'\
|
||||
-H 'http_header_key5: row2_value5'\
|
||||
-H 'http_header_key6: row2_value6'\
|
||||
-H 'http_header_key7: row2_value7' 'http://localhost:8123/' -d @-
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT id, http_key1, http_key2, http_key3, http_key4, http_key5, http_key6, http_key7 FROM ${db_name}.02884_get_http_header ORDER BY id;"
|
||||
#Insert data via tcp client
|
||||
$CLICKHOUSE_CLIENT --param_db="$db_name" -q "INSERT INTO ${db_name}.02884_get_http_header (id) values (3)"
|
||||
$CLICKHOUSE_CLIENT --param_db="$db_name" -q "SELECT * FROM ${db_name}.02884_get_http_header where id = 3"
|
||||
|
||||
echo "SELECT getClientHTTPHeader('key_from_query_1'), getClientHTTPHeader('key_from_query_2'), getClientHTTPHeader('key_from_query_3'), * FROM ${db_name}.02884_get_http_header ORDER BY id" | curl -s -H 'X-Clickhouse-User: default' \
|
||||
-H 'X-ClickHouse-Key: ' -H 'key_from_query_1: value_from_query_1' -H 'key_from_query_2: value_from_query_2' -H 'key_from_query_3: value_from_query_3' 'http://localhost:8123/' -d @-
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${db_name}.02884_get_http_header"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE IF NOT EXISTS ${db_name}.02884_header_from_table (header_name String) Engine=Memory"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO ${db_name}.02884_header_from_table values ('http_key1'), ('http_key2')"
|
||||
|
||||
echo "SELECT getClientHTTPHeader(header_name) as value from (select * FROM ${db_name}.02884_header_from_table) order by value" | curl -s -H 'X-Clickhouse-User: default' \
|
||||
-H 'X-ClickHouse-Key: ' -H 'http_key1: http_value1' -H 'http_key2: http_value2' 'http://localhost:8123/' -d @-
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP DATABASE ${db_name}"
|
@ -0,0 +1,152 @@
|
||||
{"QJC4GhRByEtEAjku":{}}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{"Cicktxh":true, "SpByjZKtr2VAyHCO":false}
|
||||
{"ClickHouse":"Is Fast", "VO7TCIkyu1akvN":{}}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ISlW1DB":"Is Fast", "5j4ATkq":{}}
|
||||
{"ClickHouse":false}
|
||||
{"ClickHouse":"Is Fast", "tRSz":13522460516091116060}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"CzTcYkQdSce":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":false}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast", "jql0YAY":[]}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast", "lF2vXus":false}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"ClickHouse":"Is Fast"}
|
||||
{"QJiGcwkonghk":"Is Fast"}
|
||||
{"sidetx":[{"name":"Alice"}, {"R6Vm":false}, {}], "SpByjZKtr2VAyHCO":false}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}]}
|
||||
{"students":[{"name":"Alice"}, {"name":true}]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}]}
|
||||
{"ISuW1":[{"naYmS":"Alice", "hzTDYZQdScOct0RS":[]}, {"name":"Bob"}]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}], "jql0YAY":[]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}], "lF2vXus":false}
|
||||
{"students":[{"QJmGe":"Alice"}, {"name":"Bob"}]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}]}
|
||||
{"kXtdet":[{"name":"Alice"}, {"name":"Bob"}]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}], "Qcm4":{}}
|
||||
{"students":[{"name":"Alice"}, {"PmjG":"Bob"}]}
|
||||
{"students":[{"name":6128974479331836233}, {"name":"Bob"}]}
|
||||
{"sGudyet5u":[{"name":"Alice"}, {"name":"Bob"}, {}]}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob"}]}
|
||||
{"students":[{"Kamc":true}, {"name":"rKKN+5#NKEi-uf5U"}]}
|
||||
{"students":[{"name":"Alice"}, {"nPL6":1455900058404521160}]}
|
||||
{"students":[{"name":"Alice", "dzm5g9aPI21iIP9":[]}, {"name":"Bob"}]}
|
||||
{"students":[{"n4z4N":true, "uJrCh4ifo":{}}, {"name":"Bob", "kMnsl0BBFk":[]}], "kG21YiAcUKpcUS2":true}
|
||||
{"students":[{"name":"Alice"}, {"name":"Bob", "wQCN":{}}]}
|
||||
{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bdvelrflX":"7am", "5ZB35":{"nHypO":[]}}, {"23slh":"12pm"}]}
|
||||
{"tkdu8hl":[{"bdvelrflX":"7am", "5ZB35":{"nHypO":[]}}, {"23slh":"12pm"}]}
|
||||
{"tkdu8hl":[{"bdvelrflX":"7am", "5mkj5":{"nHypO":[]}}, {"23slh":"12pm"}], "n8HX5N6DVpBa":["fYOPSVVK*Brv_-AajZwT"]}
|
||||
{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"npOE":[[]]}}, {"23slh":"12pm"}], "nHXa6BVq8E":["fYOPSVVK*Brv_-AajZwT"], "BHUNvB8sHk8ts6":true}
|
||||
{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"G71D":[[], []]}}, {"23slh":"12pm"}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "BHUNvB8sHk8ts6":true, "qnk47QAn0yQ3ESEgO":true}
|
||||
{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"G71D":[[], []]}}, {"23slh":"-plal2e"}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "BHUNvB8sHk8ts6":true, "qnk47QAn0yQ3ESEgO":true}
|
||||
{"tkdu8hl":[{"nQ4PePPfX":16091119822740071899, "5mkj5":{"Gpq7":[[], [false]]}, "YgbEtY":true}, {"23slh":false}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "ByRvBC4H0kgydJ":false, "zqokAQz8z0KnPOBrs8":true}
|
||||
{"kzcUZOl":[{"nQ4PePPfX":16091119822740071899, "Ekmj":{"lBKR":[[], [false], []], "dLc32r2f":{}}, "xbguW":"vGV&bitEteAH%-Eigg_7VlejYuHP"}, {"23slh":false}, {}], "FOIRaJ6VqVCKD0E":["fYOPSVVK*Brv_-AajZwT", 17244534201851710710], "ByRvBC4H0kgydJ":false, "zqokAQz8z0KnPOBrs8":true}
|
||||
{"kzcUZOl":[{"nQ4PePPfX":16091119822740071899, "Ekmj":{"lBKR":[[3774015142547830176], [false], []], "rCmVPvvf":{"wU6YWjag":[]}}, "xb7uW":"pWUTs&ikTCNRQt"}, {"23slh":false}, {}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zqokAQz8z0KnPOBrs8":true}
|
||||
{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[14228925827882160318, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":true}, {}, {"qOElRhbehMXQNrln":{"PDoZa8OJHh1al59Ggq":{}}}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zqokAQz8z0KnPOBrs8":true}
|
||||
{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[14228925827882160318, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":18233789955605096603}, {}, {"qOElRhbehMXQNrln":{"PoZngOHXMaWGRJq":{"QlnPi9zKoBtW2nGWB":"LgFazuGX*CuDy7X%4hkEmykg@6"}}}], "h3IK06PQGfCRQ":[false, false], "SyRRLBzEjy8YJ":false, "zQO8BA7nazqKW7CRP8":true}
|
||||
{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[16730631663303458403, "TpCrsW@11Io1sSu1@nFm"], [true], []], "rOmNvc":{"wU6YWjag":[false], "pIK6tGXUp1gekWViJ":{}}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jBT1ImcYb77bl2":true}, {"dsyf":18233789955605096603, "mmCFLovnBThJPtpQG0Tv":false}, {}, {"qOElRhbehMXQNrln":{"PoZngOHXMaWGRJq":{"QlnPi9zKoBtW2nGWB":"LgFazuGX*CuDy7X%4hkEmykg@6"}}}, {"sx21nRmS69bXRo":[]}], "h3IK06PQGfCRQ":[false, "HjPw@G1Icu#dn"], "SyRRLBzEjy8YJ":false, "zQO8BA7nazqKW7CRP8":true}
|
||||
{"ukrzZl":[{"nQ4PePPfX":16091119822740071899, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D"], [true], [], []], "rOmNvc":{"wOWxSWQf":[false], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988}, "igqgnb":[]}, "xb7uW":"pWUTs&ikTCNRQt", "jlT1T35c27wbl2":true}, {"dsyf":18233789955605096603, "mYikENkiDhPRtQHOr":true}, {}, {"qOElRhbehMXQNrln":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "JWOUP4WB1":14622543266409160782}}}, {"sx21nRmS69bXRo":[]}], "h3IK06PQGfCRQ":[false, "HjPw@G1Icu#dn"], "S1ncA0ERs8Y9v":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true}
|
||||
{"ukrzZl":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", false], [true, true], [], []], "rOmNvc":{"wOWxSWQf":[false], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988}, "igqgnb":[], "pUDeAJw":"MN^9hUPKv811Vq!"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":18233789955605096603, "mYikENkiDhPRtQHOr":true}, {}, {"qOElRhbehMXQNrln":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "aOUaQBB":false}}}, {"x27uem04bX6R87b":[[]]}, {"MqSQ5v":[]}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc"], "S1ncA0ERs8Y9v":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true}
|
||||
{"UkPbWZl":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lBKR":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", false], [true, true], [false], []], "rvCMyf":{"2pnWUuQ6J":[false, "q-5Gl5B8uOK"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":516601863564431352}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":"F!*nU1V_WOni8$a9RXBHGob^sg", "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"4GBqJBrnoOHJW5GA":{"QaPSqINbjb7nGx9qz":8975023301134451623, "OUlR":false}}}, {"x27uem04bX6R87b":[[]]}, {"MqSQ5v":[]}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc", true], "dlCX4s8LF":"@7EShAFjSycp%Wo0gHn", "zQO8BA7nazqKW7CRP8":true, "XahaweEPjnHUyKsT":{}}
|
||||
{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "5kmG":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", 17822336972471685000], [true, true], [false], [], []], "rvCMyf":{"2pnWUuQ6J":[false, "q-5Gl5B8uOK"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"7Qf27pQMkchIOBWX":{"QaPSqINbjb7nGx9qz":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[]], []]}, {"MqSQ5v":[9304041946960766827]}, {}], "h3IK06PQGfCRQ":[false, "7pq+IfdiKeTkTym7AWjlc", true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
|
||||
{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "mGJx":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", 17822336972471685000], [true, true], [10370853850869029207], [], ["VaTduwAFH0ahN5xeJU"]], "rvCMyf":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "pIK6tGXUp1gekWViJ":{}, "pFKIzg3HC":14538916875375166988, "yeNIt3JgSC0K":1931793149388080066}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"qOURhbeBpKE8qrhC":{"7Qf27pQMkchIOBWX":{"aKaShNyxj7Gx9qB":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[]], []]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "h3IK06PQGfCRQ":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
|
||||
{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "xGBZx":{"lB3l":[[16730631663303458403, "eiUmT%F$FQBWtWz^Tt7Ix&D", "sFwAP3"], [true, "-TBj_T1BS7OJh8^p1qO3!DK_X&CfwetZ"], [5795439407585677270, false], [], ["VaTduwAFH0ahN5xeJU"]], "OvMy":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "wni3QGXfpgeq":{"QF0hiIqRIKp2mp04U":14287172497490584292}, "M8pg0INzhg3Hz":14538916875375166988, "yeNIt3JgSC0K":false, "TeFWw":[]}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"fDT@hLdFJNXwBfJ__Fok7u2@BWY^t0"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true}, {}, {"DjYSOeUFNepEK4XvC":{"7Qf27pQMkchIOBWX":{"aKaShNyxj7Gx9qB":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}}}, {"x27uem04bX6R87b":[[[15632688604980432085]], []]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "h3IK06PQGfCRQ":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
|
||||
{"IkkCdvbW8oLK":[{"nQ4PePPfX":11197252787701758701, "xGBZx":{"lB3l":[["_#JSXSLdVKXb+c", "eiUmT%F$FQBWtWz^Tt7Ix&D", "sFwAP3"], [true, "-TBj_T1BS7OJh8^p1qO3!DK_X&CfwetZ"], [5795439407585677270, false], [], ["VaTduwAFH0ahN5xeJU"]], "OvMy":{"2pnWUuQ6J":[false, "6J%Orinf%4"], "wni3QGXfpgeq":{"QF0hiIqRIKp2mp04U":14287172497490584292}, "M8pg0INzhg3Hz":14538916875375166988, "yeNIt3JgSC0K":false, "TeFWw":[]}, "BVH5PAgEe4b":[], "pUDeAJw":"LnJMn0D&2lr^k!A", "uDl68z":"8&VE7"}, "oiU7x8":false, "jlT1T35c27wbl2":false}, {"dsyf":false, "mYikENkiDhPRtQHOr":true, "lbci":{}}, {}, {"DjYSOeUFNepEK4XvC":{"QVEsjfQBcsIEbRWBW":{"uGYvt33UTmxj7t2B":8975023301134451623, "OUlR":false, "EoEJ7GlbhI0":[]}, "Qya8i":{"EMfurslq2KFOCa29od0d":[]}}}, {"x27uem04bX6R87b":[[[15632688604980432085]], [[]]]}, {"MqSQ5v":[9304041946960766827, "T##LF8eDM"]}, {}], "sEdwKHDRafKvC":[false, 6667769656296380039, true], "dlCX4s8LF":true, "zQO8BA7nazqKW7CRP8":true, "fOa5rfhNLCiqjrnUrtZ6":{}}
|
||||
{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
|
||||
{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
|
||||
{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
|
||||
{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
|
||||
{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
|
||||
{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
|
||||
{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}], "6EigJgc8sxf7VIfMkDl":[]}
|
||||
{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
|
||||
{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
|
||||
{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
|
||||
{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
|
||||
{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
|
||||
{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
|
||||
{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
|
||||
{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
|
||||
{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
|
||||
{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
|
||||
{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
|
||||
{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
|
||||
{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
|
||||
{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
|
||||
{"schedule":[{"breakfast":"7am", "5ZB35":{"nHypO":[]}}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bdvelrflX":"7am"}, {"lunch":"12pm"}]}
|
||||
{"23sldMp":[{"Ob8hrGkHsU8X":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"bMnamkjsAsat":"7am"}, {"lunch":"12pm", "OfmJPaS":{}}]}
|
||||
{"snjTZul":[{"breakfast":"7am"}, {"lHkn6N":1318333088581732761}, {"bQH4jPs":{}}], "Hrv8ZL6":[]}
|
||||
{"schedule":[{"QrqaD":"!uUry9J-#VUCkKD0yyI+xM", "3e8EfNin":"0_Ny&1pcBzd8YEFq8hn4+Q#y^ESEg*"}, {"lunch":"12pm"}], "hGh8RR":{}}
|
||||
{"schedule":[{"regEsl2t":true, "q5flU9DI7erByRjh":{}}, {"lH0h":"%yJEbznodCJ8-#KzPNcBHrsr"}, {"pPk2zAcfUxDZcO":{}}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}, {}], "hZNsEeUmexM":{}}
|
||||
{"lhhG":[{"breakfast":"7am"}, {"lunch":"12pm", "OEgZYuhDWP3vGbV4bi":[]}, {}]}
|
||||
{"schedule":[{"breakfast":"kj*RPaKLng*&h4&UBqa-tw%53aE", "WtHnb8mVPvvHDUYWaJSB":[[]]}, {"lunch":"12pm"}], "6EigJgc8sxf7VIfMkDl":[]}
|
||||
{"schedule":[{"breakfast":false}, {"lunch":"12pm", "WikTL":1724418800345361559}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"h3hK0l":[{"breakfast":"7am", "fGNLfAC":{}}, {"lETzn6S":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"izEx":9011753325952200749}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"mY7la":17408441466865856756, "yIG0VqnoY1TTMjs":{"11BIo1csSuB1n":10038860187222625751}}]}
|
||||
{"cSJ8eOuN":[{"breakfast":"7am", "UgpWK":{"Wkha9tqdiOefZfAKQcEg":"EbhMQNrlngPo"}}, {"lunch":"12pm", "wGWGRJqJlPYzCB0":[]}, {}]}
|
||||
{"UBgFuue":[{"brrak2st":"kEmykg@6-%h-OQ@O_"}, {"lunch":"12pm", "7DnPaGPqi5Wr7":false}, {}]}
|
||||
{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm", "LeH3":{}}]}
|
||||
{"schedule":[{"breakon":true}, {"Sx1Rch":9823913620251756169, "0TvaWJUmv0Cv":{}}]}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{"cuNC":"j#Q*KbvL"}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{"e2mZBQPL9f0pgd0sXR":false}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
{}
|
||||
730
|
||||
200
|
106
tests/queries/0_stateless/02918_fuzzjson_table_function.sql
Normal file
106
tests/queries/0_stateless/02918_fuzzjson_table_function.sql
Normal file
@ -0,0 +1,106 @@
|
||||
-- Tags: no-parallel, no-replicated-database: Named collection is used
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
--
|
||||
|
||||
DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;
|
||||
CREATE NAMED COLLECTION 02918_json_fuzzer AS json_str='{}';
|
||||
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, random_seed=54321) LIMIT 10;
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"ClickHouse":"Is Fast"}', random_seed=1337) LIMIT 20;
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"students":[{"name":"Alice"}, {"name":"Bob"}]}', random_seed=1337) LIMIT 20;
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}', random_seed=123456, reuse_output=true) LIMIT 20;
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}', random_seed=123456, reuse_output=false) LIMIT 20;
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer,
|
||||
json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}',
|
||||
random_seed=123456,
|
||||
reuse_output=0,
|
||||
max_output_length=128) LIMIT 20;
|
||||
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer,
|
||||
json_str='{"schedule":[{"breakfast":"7am"}, {"lunch":"12pm"}]}',
|
||||
random_seed=123456,
|
||||
reuse_output=0,
|
||||
max_output_length=65536,
|
||||
max_nesting_level=10,
|
||||
max_array_size=20) LIMIT 20;
|
||||
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer,
|
||||
random_seed=6667,
|
||||
max_nesting_level=0) LIMIT 10;
|
||||
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer,
|
||||
random_seed=6667,
|
||||
max_object_size=0,
|
||||
max_array_size=0) LIMIT 10;
|
||||
|
||||
--
|
||||
DROP TABLE IF EXISTS 02918_table_str;
|
||||
CREATE TABLE 02918_table_str (json_str String) Engine=Memory;
|
||||
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer) limit 10;
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer) limit 10;
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(02918_json_fuzzer, random_seed=123, reuse_output=true) limit 10;
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
random_seed=6666) LIMIT 200;
|
||||
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
random_seed=6666,
|
||||
min_key_length=1,
|
||||
max_key_length=5) LIMIT 200;
|
||||
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
max_nesting_level=128,
|
||||
reuse_output=true,
|
||||
random_seed=6666,
|
||||
min_key_length=5,
|
||||
max_key_length=5) LIMIT 200;
|
||||
|
||||
INSERT INTO 02918_table_str SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 30, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
random_seed=6666,
|
||||
reuse_output=1,
|
||||
probability=0.5,
|
||||
max_output_length=65536,
|
||||
max_nesting_level=18446744073709551615,
|
||||
max_array_size=18446744073709551615,
|
||||
max_object_size=18446744073709551615,
|
||||
max_key_length=65536,
|
||||
max_string_value_length=65536) LIMIT 100;
|
||||
|
||||
SELECT count() FROM 02918_table_str;
|
||||
|
||||
DROP TABLE IF EXISTS 02918_table_str;
|
||||
|
||||
--
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_output_length="Hello") LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_output_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=10) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=-0.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=1.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, probability=1.1) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_string_value_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=65537) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
--
|
||||
DROP TABLE IF EXISTS 02918_table_obj;
|
||||
CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory;
|
||||
|
||||
INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
random_seed=12345) LIMIT 200;
|
||||
SELECT count() FROM 02918_table_obj;
|
||||
|
||||
DROP TABLE IF EXISTS 02918_table_obj;
|
||||
|
||||
DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;
|
@ -0,0 +1,3 @@
|
||||
0 0 false
|
||||
1 1 true
|
||||
0 0 false
|
@ -0,0 +1,18 @@
|
||||
DROP TABLE IF EXISTS crash_02919;
|
||||
|
||||
CREATE TABLE crash_02919 (
|
||||
b Int64,
|
||||
c Nullable(Int64) MATERIALIZED b,
|
||||
d Nullable(Bool) MATERIALIZED b
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple();
|
||||
|
||||
INSERT INTO crash_02919 VALUES (0);
|
||||
SELECT b, c, d FROM crash_02919;
|
||||
ALTER TABLE crash_02919 UPDATE b = 1 WHERE 1=1 SETTINGS mutations_sync = 1;
|
||||
SELECT b, c, d FROM crash_02919;
|
||||
ALTER TABLE crash_02919 UPDATE b = 0.1 WHERE 1=1 SETTINGS mutations_sync = 1;
|
||||
SELECT b, c, d FROM crash_02919;
|
||||
|
||||
DROP TABLE crash_02919;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user