Merge branch 'master' into new-nav

This commit is contained in:
Rich Raposa 2023-03-09 08:12:23 -07:00 committed by GitHub
commit 66dd5b3834
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 706 additions and 108 deletions

View File

@ -115,6 +115,13 @@ configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/A
# ARROW_ORC + adapters/orc/CMakefiles
set(ORC_SRCS
"${CMAKE_CURRENT_BINARY_DIR}/orc_proto.pb.h"
"${ORC_SOURCE_SRC_DIR}/sargs/ExpressionTree.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/Literal.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/PredicateLeaf.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/SargsApplier.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/SearchArgument.cc"
"${ORC_SOURCE_SRC_DIR}/sargs/TruthValue.cc"
"${ORC_SOURCE_SRC_DIR}/Exceptions.cc"
"${ORC_SOURCE_SRC_DIR}/OrcFile.cc"
"${ORC_SOURCE_SRC_DIR}/Reader.cc"
@ -129,13 +136,20 @@ set(ORC_SRCS
"${ORC_SOURCE_SRC_DIR}/MemoryPool.cc"
"${ORC_SOURCE_SRC_DIR}/RLE.cc"
"${ORC_SOURCE_SRC_DIR}/RLEv1.cc"
"${ORC_SOURCE_SRC_DIR}/RLEv2.cc"
"${ORC_SOURCE_SRC_DIR}/RleDecoderV2.cc"
"${ORC_SOURCE_SRC_DIR}/RleEncoderV2.cc"
"${ORC_SOURCE_SRC_DIR}/RLEV2Util.cc"
"${ORC_SOURCE_SRC_DIR}/Statistics.cc"
"${ORC_SOURCE_SRC_DIR}/StripeStream.cc"
"${ORC_SOURCE_SRC_DIR}/Timezone.cc"
"${ORC_SOURCE_SRC_DIR}/TypeImpl.cc"
"${ORC_SOURCE_SRC_DIR}/Vector.cc"
"${ORC_SOURCE_SRC_DIR}/Writer.cc"
"${ORC_SOURCE_SRC_DIR}/Adaptor.cc"
"${ORC_SOURCE_SRC_DIR}/BloomFilter.cc"
"${ORC_SOURCE_SRC_DIR}/Murmur3.cc"
"${ORC_SOURCE_SRC_DIR}/BlockBuffer.cc"
"${ORC_SOURCE_SRC_DIR}/wrap/orc-proto-wrapper.cc"
"${ORC_SOURCE_SRC_DIR}/io/InputStream.cc"
"${ORC_SOURCE_SRC_DIR}/io/OutputStream.cc"
"${ORC_ADDITION_SOURCE_DIR}/orc_proto.pb.cc"
@ -358,6 +372,9 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_BROTLI)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})
add_library(_arrow ${ARROW_SRCS})
@ -372,6 +389,7 @@ target_link_libraries(_arrow PRIVATE
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::brotli
)
target_link_libraries(_arrow PUBLIC _orc)

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit f9a393ed2433a60034795284f82d093b348f2102
Subproject commit c5d7755ba0b9a95631c8daea4d094101f26ec761

View File

@ -161,7 +161,9 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Authentication failed" \
-e "Cannot flush" \
-e "Container already exists" \
/var/log/clickhouse-server/clickhouse-server.upgrade.log | zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
clickhouse-server.upgrade.log \
| grep -av -e "_repl_01111_.*Mapping for table with UUID" \
| zgrep -Fa "<Error>" > /test_output/upgrade_error_messages.txt \
&& echo -e "Error message in clickhouse-server.log (see upgrade_error_messages.txt)$FAIL$(head_escaped /test_output/upgrade_error_messages.txt)" \
>> /test_output/test_results.tsv \
|| echo -e "No Error messages after server upgrade$OK" >> /test_output/test_results.tsv

File diff suppressed because one or more lines are too long

View File

@ -1981,6 +1981,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`.
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`.
## Arrow {#data-format-arrow}
@ -2051,6 +2052,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`.
- [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`.
## ArrowStream {#data-format-arrow-stream}
@ -2107,6 +2109,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename.
### Arrow format settings {#parquet-format-settings}
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
- [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`.
- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`.
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.

View File

@ -1014,6 +1014,12 @@ Use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString column
Enabled by default.
### output_format_arrow_compression_method {#output_format_arrow_compression_method}
Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed)
Default value: `none`.
## ORC format settings {#orc-format-settings}
### input_format_orc_import_nested {#input_format_orc_import_nested}
@ -1057,6 +1063,12 @@ Use ORC String type instead of Binary for String columns.
Disabled by default.
### output_format_orc_compression_method {#output_format_orc_compression_method}
Compression method used in output ORC format. Supported codecs: `lz4`, `snappy`, `zlib`, `zstd`, `none` (uncompressed)
Default value: `none`.
## Parquet format settings {#parquet-format-settings}
### input_format_parquet_import_nested {#input_format_parquet_import_nested}
@ -1112,6 +1124,12 @@ The version of Parquet format used in output format. Supported versions: `1.0`,
Default value: `2.latest`.
### output_format_parquet_compression_method {#output_format_parquet_compression_method}
Compression method used in output Parquet format. Supported codecs: `snappy`, `lz4`, `brotli`, `zstd`, `gzip`, `none` (uncompressed)
Default value: `snappy`.
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -70,6 +70,12 @@ A materialized view is implemented as follows: when inserting data to the table
Materialized views in ClickHouse use **column names** instead of column order during insertion into destination table. If some column names are not present in the `SELECT` query result, ClickHouse uses a default value, even if the column is not [Nullable](../../data-types/nullable.md). A safe practice would be to add aliases for every column when using Materialized views.
Materialized views in ClickHouse are implemented more like insert triggers. If theres some aggregation in the view query, its applied only to the batch of freshly inserted data. Any changes to existing data of source table (like update, delete, drop partition, etc.) does not change the materialized view.
Materialized views in ClickHouse do not have deterministic behaviour in case of errors. This means that blocks that had been already written will be preserved in the destination table, but all blocks after error will not.
By default if pushing to one of views fails, then the INSERT query will fail too, and some blocks may not be written to the destination table. This can be changed using `materialized_views_ignore_errors` setting (you should set it for `INSERT` query), if you will set `materialized_views_ignore_errors=true`, then any errors while pushing to views will be ignored and all blocks will be written to the destination table.
Also note, that `materialized_views_ignore_errors` set to `true` by default for `system.*_log` tables.
:::
If you specify `POPULATE`, the existing table data is inserted into the view when creating it, as if making a `CREATE TABLE ... AS SELECT ...` . Otherwise, the query contains only the data inserted in the table after creating the view. We **do not recommend** using `POPULATE`, since data inserted in the table during the view creation will not be inserted in it.

View File

@ -1867,8 +1867,8 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
String query;
{
WriteBufferFromOwnString wb;
wb << "SELECT DISTINCT " << partition_name << " AS partition FROM"
<< " " << getQuotedTable(task_shard.table_read_shard) << " ORDER BY partition DESC";
wb << "SELECT " << partition_name << " AS partition FROM "
<< getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC";
query = wb.str();
}

View File

@ -355,21 +355,67 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
if (select_limit_by)
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
/// Combine limit expression with limit setting
/// Combine limit expression with limit and offset settings into final limit expression
/// The sequence of application is the following - offset expression, limit expression, offset setting, limit setting.
/// Since offset setting is applied after limit expression, but we want to transfer settings into expression
/// we must decrease limit expression by offset setting and then add offset setting to offset expression.
/// select_limit - limit expression
/// limit - limit setting
/// offset - offset setting
///
/// if select_limit
/// -- if offset >= select_limit (expr 0)
/// then (0) (0 rows)
/// -- else if limit > 0 (expr 1)
/// then min(select_limit - offset, limit) (expr 2)
/// -- else
/// then (select_limit - offset) (expr 3)
/// else if limit > 0
/// then limit
///
/// offset = offset + of_expr
auto select_limit = select_query_typed.limitLength();
if (select_limit && limit)
if (select_limit)
{
auto function_node = std::make_shared<FunctionNode>("least");
function_node->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
current_query_tree->getLimit() = std::move(function_node);
}
else if (limit)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
else if (select_limit)
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
/// Shortcut
if (offset == 0 && limit == 0)
{
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
}
else
{
/// expr 3
auto expr_3 = std::make_shared<FunctionNode>("minus");
expr_3->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
expr_3->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
/// Combine offset expression with offset setting
/// expr 2
auto expr_2 = std::make_shared<FunctionNode>("least");
expr_2->getArguments().getNodes().push_back(expr_3->clone());
expr_2->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(limit));
/// expr 0
auto expr_0 = std::make_shared<FunctionNode>("greaterOrEquals");
expr_0->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(offset));
expr_0->getArguments().getNodes().push_back(buildExpression(select_limit, current_context));
/// expr 1
auto expr_1 = std::make_shared<ConstantNode>(limit > 0);
auto function_node = std::make_shared<FunctionNode>("multiIf");
function_node->getArguments().getNodes().push_back(expr_0);
function_node->getArguments().getNodes().push_back(std::make_shared<ConstantNode>(0));
function_node->getArguments().getNodes().push_back(expr_1);
function_node->getArguments().getNodes().push_back(expr_2);
function_node->getArguments().getNodes().push_back(expr_3);
current_query_tree->getLimit() = std::move(function_node);
}
}
else if (limit > 0)
current_query_tree->getLimit() = std::make_shared<ConstantNode>(limit);
/// Combine offset expression with offset setting into final offset expression
auto select_offset = select_query_typed.limitOffset();
if (select_offset && offset)
{

View File

@ -516,6 +516,7 @@ class IColumn;
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
@ -864,6 +865,7 @@ class IColumn;
M(Bool, output_format_parquet_string_as_string, false, "Use Parquet String type instead of Binary for String columns.", 0) \
M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
@ -906,8 +908,10 @@ class IColumn;
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
M(Bool, output_format_arrow_string_as_string, false, "Use Arrow String type instead of Binary for String columns", 0) \
M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, "Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.", 0) \
M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", "Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)", 0) \
\
M(Bool, output_format_orc_string_as_string, false, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "lz4", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
\
M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
\

View File

@ -81,7 +81,10 @@ namespace SettingsChangesHistory
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"},
{"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"},
{"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"},
{"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}}},
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},

View File

@ -158,7 +158,7 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS,
{"XML", FormatSettings::EscapingRule::XML},
{"Raw", FormatSettings::EscapingRule::Raw}})
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS,
{{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN},
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
@ -176,11 +176,30 @@ IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
{"pread", LocalFSReadMethod::pread},
{"read", LocalFSReadMethod::read}})
IMPLEMENT_SETTING_ENUM_WITH_RENAME(ParquetVersion, ErrorCodes::BAD_ARGUMENTS,
{{"1.0", FormatSettings::ParquetVersion::V1_0},
{"2.4", FormatSettings::ParquetVersion::V2_4},
{"2.6", FormatSettings::ParquetVersion::V2_6},
{"2.latest", FormatSettings::ParquetVersion::V2_LATEST}})
IMPLEMENT_SETTING_ENUM(ParquetCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ParquetCompression::NONE},
{"snappy", FormatSettings::ParquetCompression::SNAPPY},
{"zstd", FormatSettings::ParquetCompression::ZSTD},
{"gzip", FormatSettings::ParquetCompression::GZIP},
{"lz4", FormatSettings::ParquetCompression::LZ4},
{"brotli", FormatSettings::ParquetCompression::BROTLI}})
IMPLEMENT_SETTING_ENUM(ArrowCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ArrowCompression::NONE},
{"lz4_frame", FormatSettings::ArrowCompression::LZ4_FRAME},
{"zstd", FormatSettings::ArrowCompression::ZSTD}})
IMPLEMENT_SETTING_ENUM(ORCCompression, ErrorCodes::BAD_ARGUMENTS,
{{"none", FormatSettings::ORCCompression::NONE},
{"snappy", FormatSettings::ORCCompression::SNAPPY},
{"zstd", FormatSettings::ORCCompression::ZSTD},
{"zlib", FormatSettings::ORCCompression::ZLIB},
{"lz4", FormatSettings::ORCCompression::LZ4}})
}

View File

@ -194,6 +194,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
DECLARE_SETTING_ENUM_WITH_RENAME(ParquetCompression, FormatSettings::ParquetCompression)
DECLARE_SETTING_ENUM_WITH_RENAME(ArrowCompression, FormatSettings::ArrowCompression)
DECLARE_SETTING_ENUM_WITH_RENAME(ORCCompression, FormatSettings::ORCCompression)
enum class Dialect
{
clickhouse,

View File

@ -118,6 +118,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
@ -158,6 +159,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.arrow.output_string_as_string = settings.output_format_arrow_string_as_string;
format_settings.arrow.output_fixed_string_as_fixed_byte_array = settings.output_format_arrow_fixed_string_as_fixed_byte_array;
format_settings.arrow.output_compression_method = settings.output_format_arrow_compression_method;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
@ -168,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;

View File

@ -86,6 +86,13 @@ struct FormatSettings
UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH;
enum class ArrowCompression
{
NONE,
LZ4_FRAME,
ZSTD
};
struct
{
UInt64 row_group_size = 1000000;
@ -96,6 +103,7 @@ struct FormatSettings
bool case_insensitive_column_matching = false;
bool output_string_as_string = false;
bool output_fixed_string_as_fixed_byte_array = true;
ArrowCompression output_compression_method = ArrowCompression::NONE;
} arrow;
struct
@ -183,6 +191,16 @@ struct FormatSettings
V2_LATEST,
};
enum class ParquetCompression
{
NONE,
SNAPPY,
ZSTD,
LZ4,
GZIP,
BROTLI,
};
struct
{
UInt64 row_group_size = 1000000;
@ -195,6 +213,7 @@ struct FormatSettings
bool output_fixed_string_as_fixed_byte_array = true;
UInt64 max_block_size = 8192;
ParquetVersion output_version;
ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
} parquet;
struct Pretty
@ -276,6 +295,15 @@ struct FormatSettings
bool accurate_types_of_literals = true;
} values;
enum class ORCCompression
{
NONE,
LZ4,
SNAPPY,
ZSTD,
ZLIB,
};
struct
{
bool import_nested = false;
@ -285,6 +313,7 @@ struct FormatSettings
bool case_insensitive_column_matching = false;
std::unordered_set<int> skip_stripes = {};
bool output_string_as_string = false;
ORCCompression output_compression_method = ORCCompression::NONE;
} orc;
/// For capnProto format we should determine how to

View File

@ -70,7 +70,7 @@ private:
if (!has_prev_value)
{
dst[i] = is_first_line_zero ? 0 : src[i];
dst[i] = is_first_line_zero ? static_cast<Dst>(0) : static_cast<Dst>(src[i]);
prev = src[i];
has_prev_value = true;
}
@ -102,6 +102,10 @@ private:
f(UInt32());
else if (which.isUInt64())
f(UInt64());
else if (which.isUInt128())
f(UInt128());
else if (which.isUInt256())
f(UInt256());
else if (which.isInt8())
f(Int8());
else if (which.isInt16())
@ -110,6 +114,10 @@ private:
f(Int32());
else if (which.isInt64())
f(Int64());
else if (which.isInt128())
f(Int128());
else if (which.isInt256())
f(Int256());
else if (which.isFloat32())
f(Float32());
else if (which.isFloat64())

View File

@ -169,11 +169,11 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
return {};
}
auto write_error_status = [&](const String & host_id, const String & error_message, const String & reason)
auto write_error_status = [&](const String & host_id, const ExecutionStatus & status, const String & reason)
{
LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, error_message);
LOG_ERROR(log, "Cannot parse DDL task {}: {}. Will try to send error status: {}", entry_name, reason, status.message);
createStatusDirs(entry_path, zookeeper);
zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, error_message, zkutil::CreateMode::Persistent);
zookeeper->tryCreate(fs::path(entry_path) / "finished" / host_id, status.serializeText(), zkutil::CreateMode::Persistent);
};
try
@ -187,7 +187,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
/// Otherwise, that node will be ignored by DDLQueryStatusSource.
out_reason = "Incorrect task format";
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
write_error_status(host_fqdn_id, ExecutionStatus::fromCurrentException(), out_reason);
return {};
}
@ -212,7 +212,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
catch (...)
{
out_reason = "Cannot parse query or obtain cluster info";
write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException().serializeText(), out_reason);
write_error_status(task->host_id_str, ExecutionStatus::fromCurrentException(), out_reason);
return {};
}
@ -650,7 +650,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
bool status_written_by_table_or_db = task.ops.empty();
if (status_written_by_table_or_db)
{
throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.serializeText());
throw Exception(ErrorCodes::UNFINISHED, "Unexpected error: {}", task.execution_status.message);
}
else
{

View File

@ -426,6 +426,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
// we need query context to do inserts to target table with MV containing subqueries or joins
auto insert_context = Context::createCopy(context);
insert_context->makeQueryContext();
/// We always want to deliver the data to the original table regardless of the MVs
insert_context->setSetting("materialized_views_ignore_errors", true);
InterpreterInsertQuery interpreter(query_ptr, insert_context);
BlockIO io = interpreter.execute();

View File

@ -451,10 +451,24 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
/// Avoid early destruction of process_list_entry if it was not saved to `res` yet (in case of exception)
ProcessList::EntryPtr process_list_entry;
BlockIO res;
std::shared_ptr<InterpreterTransactionControlQuery> implicit_txn_control{};
auto implicit_txn_control = std::make_shared<bool>(false);
String query_database;
String query_table;
auto execute_implicit_tcl_query = [implicit_txn_control](const ContextMutablePtr & query_context, ASTTransactionControl::QueryType tcl_type)
{
/// Unset the flag on COMMIT and ROLLBACK
SCOPE_EXIT({ if (tcl_type != ASTTransactionControl::BEGIN) *implicit_txn_control = false; });
ASTPtr tcl_ast = std::make_shared<ASTTransactionControl>(tcl_type);
InterpreterTransactionControlQuery tc(tcl_ast, query_context);
tc.execute();
/// Set the flag after successful BIGIN
if (tcl_type == ASTTransactionControl::BEGIN)
*implicit_txn_control = true;
};
try
{
if (auto txn = context->getCurrentTransaction())
@ -674,14 +688,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
if (context->isGlobalContext())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot create transactions");
/// If there is no session (which is the default for the HTTP Handler), set up one just for this as it is necessary
/// to control the transaction lifetime
if (!context->hasSessionContext())
context->makeSessionContext();
auto tc = std::make_shared<InterpreterTransactionControlQuery>(ast, context);
tc->executeBegin(context->getSessionContext());
implicit_txn_control = std::move(tc);
execute_implicit_tcl_query(context, ASTTransactionControl::BEGIN);
}
catch (Exception & e)
{
@ -949,6 +956,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
log_processors_profiles = settings.log_processors_profiles,
status_info_to_query_log,
implicit_txn_control,
execute_implicit_tcl_query,
pulling_pipeline = pipeline.pulling(),
query_span](QueryPipeline & query_pipeline) mutable
{
@ -1062,21 +1070,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
}
if (implicit_txn_control)
{
try
{
implicit_txn_control->executeCommit(context->getSessionContext());
implicit_txn_control.reset();
}
catch (const Exception &)
{
/// An exception might happen when trying to commit the transaction. For example we might get an immediate exception
/// because ZK is down and wait_changes_become_visible_after_commit_mode == WAIT_UNKNOWN
implicit_txn_control.reset();
throw;
}
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT);
}
if (query_span)
@ -1104,13 +1099,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
quota(quota),
status_info_to_query_log,
implicit_txn_control,
execute_implicit_tcl_query,
query_span](bool log_error) mutable
{
if (implicit_txn_control)
{
implicit_txn_control->executeRollback(context->getSessionContext());
implicit_txn_control.reset();
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
else if (auto txn = context->getCurrentTransaction())
txn->onException();
@ -1179,15 +1172,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
}
catch (...)
{
if (implicit_txn_control)
{
implicit_txn_control->executeRollback(context->getSessionContext());
implicit_txn_control.reset();
}
if (*implicit_txn_control)
execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK);
else if (auto txn = context->getCurrentTransaction())
{
txn->onException();
}
if (!internal)
onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds());

View File

@ -17,6 +17,24 @@ namespace ErrorCodes
extern const int UNKNOWN_EXCEPTION;
}
namespace
{
arrow::Compression::type getArrowCompression(FormatSettings::ArrowCompression method)
{
switch (method)
{
case FormatSettings::ArrowCompression::NONE:
return arrow::Compression::type::UNCOMPRESSED;
case FormatSettings::ArrowCompression::ZSTD:
return arrow::Compression::type::ZSTD;
case FormatSettings::ArrowCompression::LZ4_FRAME:
return arrow::Compression::type::LZ4_FRAME;
}
}
}
ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_)
, stream{stream_}
@ -78,12 +96,14 @@ void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr<arrow::Schema>
{
arrow_ostream = std::make_shared<ArrowBufferedOutputStream>(out);
arrow::Result<std::shared_ptr<arrow::ipc::RecordBatchWriter>> writer_status;
arrow::ipc::IpcWriteOptions options = arrow::ipc::IpcWriteOptions::Defaults();
options.codec = *arrow::util::Codec::Create(getArrowCompression(format_settings.arrow.output_compression_method));
// TODO: should we use arrow::ipc::IpcOptions::alignment?
if (stream)
writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema);
writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema, options);
else
writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema);
writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema,options);
if (!writer_status.ok())
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,

View File

@ -28,6 +28,34 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NOT_IMPLEMENTED;
}
namespace
{
orc::CompressionKind getORCCompression(FormatSettings::ORCCompression method)
{
if (method == FormatSettings::ORCCompression::NONE)
return orc::CompressionKind::CompressionKind_NONE;
#if USE_SNAPPY
if (method == FormatSettings::ORCCompression::SNAPPY)
return orc::CompressionKind::CompressionKind_SNAPPY;
#endif
if (method == FormatSettings::ORCCompression::ZSTD)
return orc::CompressionKind::CompressionKind_ZSTD;
if (method == FormatSettings::ORCCompression::LZ4)
return orc::CompressionKind::CompressionKind_LZ4;
if (method == FormatSettings::ORCCompression::ZLIB)
return orc::CompressionKind::CompressionKind_ZLIB;
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
}
}
ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {}
@ -544,7 +572,7 @@ void ORCBlockOutputFormat::prepareWriter()
{
const Block & header = getPort(PortKind::Main).getHeader();
schema = orc::createStructType();
options.setCompression(orc::CompressionKind::CompressionKind_NONE);
options.setCompression(getORCCompression(format_settings.orc.output_compression_method));
size_t columns_count = header.columns();
for (size_t i = 0; i != columns_count; ++i)
schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i])));

View File

@ -14,9 +14,13 @@ namespace DB
namespace ErrorCodes
{
extern const int UNKNOWN_EXCEPTION;
extern const int NOT_IMPLEMENTED;
}
static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
namespace
{
parquet::ParquetVersion::type getParquetVersion(const FormatSettings & settings)
{
switch (settings.parquet.output_version)
{
@ -31,6 +35,35 @@ static parquet::ParquetVersion::type getParquetVersion(const FormatSettings & se
}
}
parquet::Compression::type getParquetCompression(FormatSettings::ParquetCompression method)
{
if (method == FormatSettings::ParquetCompression::NONE)
return parquet::Compression::type::UNCOMPRESSED;
#if USE_SNAPPY
if (method == FormatSettings::ParquetCompression::SNAPPY)
return parquet::Compression::type::SNAPPY;
#endif
#if USE_BROTLI
if (method == FormatSettings::ParquetCompression::BROTLI)
return parquet::Compression::type::BROTLI;
#endif
if (method == FormatSettings::ParquetCompression::ZSTD)
return parquet::Compression::type::ZSTD;
if (method == FormatSettings::ParquetCompression::LZ4)
return parquet::Compression::type::LZ4;
if (method == FormatSettings::ParquetCompression::GZIP)
return parquet::Compression::type::GZIP;
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method");
}
}
ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_), format_settings{format_settings_}
{
@ -60,9 +93,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
parquet::WriterProperties::Builder builder;
builder.version(getParquetVersion(format_settings));
#if USE_SNAPPY
builder.compression(parquet::Compression::SNAPPY);
#endif
builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
auto props = builder.build();
auto status = parquet::arrow::FileWriter::Open(
*arrow_table->schema(),

View File

@ -15,6 +15,7 @@
#include <Storages/StorageMaterializedView.h>
#include <Storages/StorageValues.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Common/Exception.h>
#include <Common/CurrentThread.h>
#include <Common/MemoryTracker.h>
#include <Common/ProfileEvents.h>
@ -710,6 +711,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
if (!output.canPush())
return Status::PortFull;
bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors;
size_t num_finished = 0;
size_t pos = 0;
for (auto & input : inputs)
@ -735,7 +737,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
else
statuses[i].exception = data.exception;
if (i == 0 && statuses[0].is_first)
if (i == 0 && statuses[0].is_first && !materialized_views_ignore_errors)
{
output.pushData(std::move(data));
return Status::PortFull;
@ -752,7 +754,7 @@ IProcessor::Status FinalizingViewsTransform::prepare()
if (!statuses.empty())
return Status::Ready;
if (any_exception)
if (any_exception && !materialized_views_ignore_errors)
output.pushException(any_exception);
output.finish();
@ -782,6 +784,8 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St
void FinalizingViewsTransform::work()
{
bool materialized_views_ignore_errors = views_data->context->getSettingsRef().materialized_views_ignore_errors;
size_t i = 0;
for (auto & view : views_data->views)
{
@ -794,6 +798,10 @@ void FinalizingViewsTransform::work()
any_exception = status.exception;
view.setException(addStorageToException(status.exception, view.table_id));
/// Exception will be ignored, it is saved here for the system.query_views_log
if (materialized_views_ignore_errors)
tryLogException(view.exception, &Poco::Logger::get("PushingToViews"), "Cannot push to the storage, ignoring the error");
}
else
{

View File

@ -30,13 +30,15 @@ def get_options(i, upgrade_check):
if i % 2 == 1:
join_alg_num = i // 2
if join_alg_num % 4 == 0:
if join_alg_num % 5 == 0:
client_options.append("join_algorithm='parallel_hash'")
if join_alg_num % 4 == 1:
if join_alg_num % 5 == 1:
client_options.append("join_algorithm='partial_merge'")
if join_alg_num % 4 == 2:
if join_alg_num % 5 == 2:
client_options.append("join_algorithm='full_sorting_merge'")
if join_alg_num % 4 == 3:
if join_alg_num % 5 == 3:
client_options.append("join_algorithm='grace_hash'")
if join_alg_num % 5 == 4:
client_options.append("join_algorithm='auto'")
client_options.append("max_rows_in_join=1000")

View File

@ -276,10 +276,14 @@ SELECT runningDifference(CAST( 0 AS Nullable(Int8)));
SELECT runningDifference(CAST( 0 AS Nullable(Int16)));
SELECT runningDifference(CAST( 0 AS Nullable(Int32)));
SELECT runningDifference(CAST( 0 AS Nullable(Int64)));
SELECT runningDifference(CAST( 0 AS Nullable(Int128)));
SELECT runningDifference(CAST( 0 AS Nullable(Int256)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt8)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt16)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt32)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt64)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt128)));
SELECT runningDifference(CAST( 0 AS Nullable(UInt256)));
SELECT runningDifference(CAST( 0 AS Nullable(Float32)));
SELECT runningDifference(CAST( 0 AS Nullable(Float64)));
SELECT runningDifference(CAST( 0 AS Nullable(Date)));
@ -288,10 +292,14 @@ SELECT runningDifference(CAST(NULL AS Nullable(Int8)));
SELECT runningDifference(CAST(NULL AS Nullable(Int16)));
SELECT runningDifference(CAST(NULL AS Nullable(Int32)));
SELECT runningDifference(CAST(NULL AS Nullable(Int64)));
SELECT runningDifference(CAST(NULL AS Nullable(Int128)));
SELECT runningDifference(CAST(NULL AS Nullable(Int256)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt8)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt16)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt32)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt64)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt128)));
SELECT runningDifference(CAST(NULL AS Nullable(UInt256));
SELECT runningDifference(CAST(NULL AS Nullable(Float32)));
SELECT runningDifference(CAST(NULL AS Nullable(Float64)));
SELECT runningDifference(CAST(NULL AS Nullable(Date)));

View File

@ -19,6 +19,30 @@
\N
\N
2
-
0
1
4
5
170141183460469231731687303715884105717
-
0
1
4
5
170141183460469231731687303715884105718
-
0
1
4
5
170141183460469231731687303715884105717
-
0
1
4
5
170141183460469231731687303715884105718
--Date Difference--
\N
\N

View File

@ -5,6 +5,14 @@ select '-';
select runningDifference(x) from (select arrayJoin([Null, 1]) as x);
select '-';
select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x);
select '-';
select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(UInt128)) as x);
select '-';
select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(UInt256)) as x);
select '-';
select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105727]::Array(Int128)) as x);
select '-';
select runningDifference(x) from (select arrayJoin([0, 1, 5, 10, 170141183460469231731687303715884105728]::Array(Int256)) as x);
select '--Date Difference--';
select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null, toDate('2010-8-9')]) as x);
select '-';

View File

@ -15,7 +15,7 @@ function create_db()
# Multiple database replicas on one server are actually not supported (until we have namespaces).
# So CREATE TABLE queries will fail on all replicas except one. But it's still makes sense for a stress test.
$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 --query \
"create database if not exists ${CLICKHOUSE_DATABASE}_repl_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \
"create database if not exists ${CLICKHOUSE_DATABASE}_repl_01111_$SUFFIX engine=Replicated('/test/01111/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '$SHARD', '$REPLICA')" \
2>&1| grep -Fa "Exception: " | grep -Fv "REPLICA_ALREADY_EXISTS" | grep -Fiv "Will not try to start it up" | \
grep -Fv "Coordination::Exception" | grep -Fv "already contains some data and it does not look like Replicated database path"
sleep 0.$RANDOM

View File

@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE orc (array1 Array(Int32), array2 Array(
$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES ([1,2,3,4,5], [[1,2], [3,4], [5]]), ([42], [[42, 42], [42]])";
$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC";
$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum;
$CLICKHOUSE_CLIENT --query="DROP TABLE orc";

View File

@ -12,3 +12,6 @@ in_transaction 10000
out_transaction 0
{"'implicit_True'":"implicit_True","all":"2","is_empty":0}
{"'implicit_False'":"implicit_False","all":"2","is_empty":1}
0
0
0

View File

@ -1,4 +1,4 @@
-- Tags: no-ordinary-database
-- Tags: no-ordinary-database, no-fasttest
CREATE TABLE landing (n Int64) engine=MergeTree order by n;
CREATE TABLE target (n Int64) engine=MergeTree order by n;
@ -92,3 +92,13 @@ WHERE
query LIKE '-- Verify that the transaction_id column is NOT populated without transaction%'
GROUP BY transaction_id
FORMAT JSONEachRow;
SET implicit_transaction=1;
SET throw_on_unsupported_query_inside_transaction=1;
SELECT * FROM system.one;
SELECT * FROM cluster('test_cluster_interserver_secret', system, one); -- { serverError NOT_IMPLEMENTED }
SELECT * FROM cluster('test_cluster_two_shards', system, one); -- { serverError NOT_IMPLEMENTED }
SET throw_on_unsupported_query_inside_transaction=0;
-- there's not session in the interserver mode
SELECT * FROM cluster('test_cluster_interserver_secret', system, one) FORMAT Null; -- { serverError INVALID_TRANSACTION }
SELECT * FROM cluster('test_cluster_two_shards', system, one);

View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --query="SELECT arrayJoin([[], [1]]) FORMAT ORC SETTINGS output_format_orc_compression_method='none'" | md5sum;

View File

@ -1,3 +0,0 @@
-- Tags: no-fasttest
SELECT arrayJoin([[], [1]]) FORMAT ORC;

View File

@ -62,7 +62,6 @@ SELECT * FROM numbers(10);
SELECT * FROM numbers(10) LIMIT 3 OFFSET 2;
3
4
5
SELECT * FROM numbers(10) LIMIT 5 OFFSET 2;
3
4

View File

@ -0,0 +1,28 @@
-- { echoOn }
select * from data_02572 order by key;
insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
select * from data_02572 order by key;
2
-- check system.query_views_log
system flush logs;
-- lower(status) to pass through clickhouse-test "exception" check
select lower(status::String), errorCodeToName(exception_code)
from system.query_views_log where
view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and
view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572')
order by event_date, event_time
;
exceptionwhileprocessing UNKNOWN_TABLE
-- materialized_views_ignore_errors=0
insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
select * from data_02572 order by key;
1
2
create table receiver_02572 as data_02572;
insert into data_02572 values (3);
select * from data_02572 order by key;
1
2
3
select * from receiver_02572 order by key;
3

View File

@ -0,0 +1,40 @@
set prefer_localhost_replica=1;
drop table if exists data_02572;
drop table if exists proxy_02572;
drop table if exists push_to_proxy_mv_02572;
drop table if exists receiver_02572;
create table data_02572 (key Int) engine=Memory();
create table proxy_02572 (key Int) engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');
-- ensure that insert fails
insert into proxy_02572 values (1); -- { serverError UNKNOWN_TABLE }
-- proxy data with MV
create materialized view push_to_proxy_mv_02572 to proxy_02572 as select * from data_02572;
-- { echoOn }
select * from data_02572 order by key;
insert into data_02572 settings materialized_views_ignore_errors=1 values (2);
select * from data_02572 order by key;
-- check system.query_views_log
system flush logs;
-- lower(status) to pass through clickhouse-test "exception" check
select lower(status::String), errorCodeToName(exception_code)
from system.query_views_log where
view_name = concatWithSeparator('.', currentDatabase(), 'push_to_proxy_mv_02572') and
view_target = concatWithSeparator('.', currentDatabase(), 'proxy_02572')
order by event_date, event_time
;
-- materialized_views_ignore_errors=0
insert into data_02572 values (1); -- { serverError UNKNOWN_TABLE }
select * from data_02572 order by key;
create table receiver_02572 as data_02572;
insert into data_02572 values (3);
select * from data_02572 order by key;
select * from receiver_02572 order by key;

View File

@ -0,0 +1,2 @@
10 querystart OK
10 queryfinish OK

View File

@ -0,0 +1,26 @@
-- Tags: no-parallel, no-replicated-database
-- Tag no-parallel: due to attaching to system.query_log
-- Tag no-replicated-database: Replicated database will has extra queries
-- Attach MV to system.query_log and check that writing query_log will not fail
set log_queries=1;
drop table if exists log_proxy_02572;
drop table if exists push_to_logs_proxy_mv_02572;
create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');
create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;
select 1 format Null;
system flush logs;
system flush logs;
drop table log_proxy_02572;
drop table push_to_logs_proxy_mv_02572;
system flush logs;
-- lower() to pass through clickhouse-test "exception" check
select count(), lower(type::String), errorCodeToName(exception_code)
from system.query_log
where current_database = currentDatabase() group by 2, 3;

View File

@ -0,0 +1,14 @@
10
10
10
10
10
10
10
10
10
10
10
10
10
10

View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='brotli'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='gzip'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='zlib'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format ORC settings output_format_orc_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='lz4_frame'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"
$CLICKHOUSE_LOCAL -q "select * from numbers(10) format Arrow settings output_format_arrow_compression_method='zstd'" | $CLICKHOUSE_LOCAL --input-format=Arrow -q "select count() from table"

View File

@ -0,0 +1,63 @@
0
1
2
3
4
15
15
16
16
17
30
30
31
31
32
102
103
104
105
105
106
107
108
109
105
106
107
108
109
60
60
61
61
62
62
63
63
64
64
60
35
35
36
36
37
37
38
38
39
39
105
106
107
108
109
12
13
13
14
14
15
15
16

View File

@ -0,0 +1,34 @@
set allow_experimental_analyzer=1;
DROP TABLE IF EXISTS test;
CREATE TABLE test (i UInt64) Engine = MergeTree() order by i;
INSERT INTO test SELECT number FROM numbers(100);
INSERT INTO test SELECT number FROM numbers(10,100);
OPTIMIZE TABLE test FINAL;
-- Only set limit
SET limit = 5;
SELECT * FROM test; -- 5 rows
SELECT * FROM test OFFSET 20; -- 5 rows
SELECT * FROM (SELECT i FROM test LIMIT 10 OFFSET 50) TMP; -- 5 rows
SELECT * FROM test LIMIT 4 OFFSET 192; -- 4 rows
SELECT * FROM test LIMIT 10 OFFSET 195; -- 5 rows
-- Only set offset
SET limit = 0;
SET offset = 195;
SELECT * FROM test; -- 5 rows
SELECT * FROM test OFFSET 20; -- no result
SELECT * FROM test LIMIT 100; -- no result
SET offset = 10;
SELECT * FROM test LIMIT 20 OFFSET 100; -- 10 rows
SELECT * FROM test LIMIT 11 OFFSET 100; -- 1 rows
-- offset and limit together
SET limit = 10;
SELECT * FROM test LIMIT 50 OFFSET 50; -- 10 rows
SELECT * FROM test LIMIT 50 OFFSET 190; -- 0 rows
SELECT * FROM test LIMIT 50 OFFSET 185; -- 5 rows
SELECT * FROM test LIMIT 18 OFFSET 5; -- 8 rows
DROP TABLE test;

View File

@ -168,6 +168,10 @@ int decompress(char * input, char * output, off_t start, off_t end, size_t max_n
return 0;
}
bool isSudo()
{
return geteuid() == 0;
}
/// Read data about files and decomrpess them.
int decompressFiles(int input_fd, char * path, char * name, bool & have_compressed_analoge, bool & has_exec, char * decompressed_suffix, uint64_t * decompressed_umask)
@ -220,6 +224,8 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
return 1;
}
bool is_sudo = isSudo();
FileData file_info;
/// Decompress files with appropriate file names
for (size_t i = 0; i < le64toh(metadata.number_of_files); ++i)
@ -319,6 +325,9 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress
perror("fsync");
if (0 != close(output_fd))
perror("close");
if (is_sudo)
chown(file_name, info_in.st_uid, info_in.st_gid);
}
if (0 != munmap(input, info_in.st_size))
@ -414,6 +423,13 @@ int main(int/* argc*/, char* argv[])
else
name = file_path;
struct stat input_info;
if (0 != stat(self, &input_info))
{
perror("stat");
return 1;
}
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
/// get inode of this executable
uint64_t inode = getInode(self);
@ -441,13 +457,6 @@ int main(int/* argc*/, char* argv[])
return 1;
}
struct stat input_info;
if (0 != stat(self, &input_info))
{
perror("stat");
return 1;
}
/// inconsistency in WSL1 Ubuntu - inode reported in /proc/self/maps is a 64bit to
/// 32bit conversion of input_info.st_ino
if (input_info.st_ino & 0xFFFFFFFF00000000 && !(inode & 0xFFFFFFFF00000000))
@ -532,6 +541,9 @@ int main(int/* argc*/, char* argv[])
return 1;
}
if (isSudo())
chown(static_cast<char *>(self), input_info.st_uid, input_info.st_gid);
if (has_exec)
{
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)