Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix-68618

This commit is contained in:
Nikita Mikhaylov 2024-08-25 14:24:20 +02:00
commit 69bb1df642
28 changed files with 229 additions and 99 deletions

View File

@ -54,6 +54,7 @@ Other upcoming meetups
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
## Recent Recordings

View File

@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage(
result_column_node = it->second;
}
/// Check if it's a dynamic subcolumn
else
else if (table_expression_data.supports_subcolumns)
{
auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name);
auto jt = table_expression_data.column_name_to_column_node.find(column_name);

View File

@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns())
{
get_column_options.withSubcolumns();
table_expression_data.supports_subcolumns = true;
}
auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end());

View File

@ -36,6 +36,7 @@ struct AnalysisTableExpressionData
std::string database_name;
std::string table_name;
bool should_qualify_columns = true;
bool supports_subcolumns = false;
NamesAndTypes column_names_and_types;
ColumnNameToColumnNodeMap column_name_to_column_node;
std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns

View File

@ -1181,13 +1181,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
/// Check if the number of all dynamic types exceeds the limit.
if (!canAddNewVariants(0, all_variants.size()))
{
/// Create list of variants with their sizes and sort it.
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
/// Create a list of variants with their sizes and names and then sort it.
std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
variants_with_sizes.reserve(all_variants.size());
for (const auto & variant : all_variants)
{
if (variant->getName() != getSharedVariantTypeName())
variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
auto variant_name = variant->getName();
if (variant_name != getSharedVariantTypeName())
variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant);
}
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
@ -1196,14 +1197,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
/// Add shared variant.
result_variants.push_back(getSharedVariantDataType());
for (const auto & [size, variant] : variants_with_sizes)
for (const auto & [size, variant_name, variant_type] : variants_with_sizes)
{
/// Add variant to the resulting variants list until we reach max_dynamic_types.
if (canAddNewVariant(result_variants.size()))
result_variants.push_back(variant);
result_variants.push_back(variant_type);
/// Add all remaining variants into shared_variants_statistics until we reach its max size.
else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
new_statistics.shared_variants_statistics[variant->getName()] = size;
new_statistics.shared_variants_statistics[variant_name] = size;
else
break;
}

View File

@ -127,7 +127,7 @@ std::string ColumnObject::getName() const
{
WriteBufferFromOwnString ss;
ss << "Object(";
ss << "max_dynamic_paths=" << max_dynamic_paths;
ss << "max_dynamic_paths=" << global_max_dynamic_paths;
ss << ", max_dynamic_types=" << max_dynamic_types;
std::vector<String> sorted_typed_paths;
sorted_typed_paths.reserve(typed_paths.size());
@ -1045,9 +1045,9 @@ void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColu
bool ColumnObject::structureEquals(const IColumn & rhs) const
{
/// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types.
/// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types.
const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs);
if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || max_dynamic_paths != rhs_object->max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
return false;
for (const auto & [path, column] : typed_paths)

View File

@ -953,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const
{
/// If we have only NULLs, index will take no effect, just return resized column.
if (hasOnlyNulls())
return cloneResized(limit);
return cloneResized(limit == 0 ? indexes.size(): limit);
/// Optimization when we have only one non empty variant and no NULLs.
/// In this case local_discriminators column is filled with identical values and offsets column
@ -1009,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray<Type> & indexes, size_t
new_variants.reserve(num_variants);
for (size_t i = 0; i != num_variants; ++i)
{
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
/// Check if no values from this variant were selected.
if (nested_perms[i].empty())
{
new_variants.emplace_back(variants[i]->cloneEmpty());
}
else
{
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
}
}
/// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation.

View File

@ -701,7 +701,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
}
column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
if (col_decl.statistics_desc)
{
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)

View File

@ -706,9 +706,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
}
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec)
for (const auto & [stats_column_name, stats] : stats_vec)
{
metadata.columns.modify(stats.column_name,
metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
}
}
@ -735,14 +735,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
{
if (!metadata.columns.has(statistics_column_name))
{
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name);
}
}
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec)
for (const auto & [stats_column_name, stats] : stats_vec)
{
metadata.columns.modify(stats.column_name,
metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.assign(stats); });
}
}
@ -867,8 +867,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
rename_visitor.visit(column_to_modify.default_desc.expression);
if (column_to_modify.ttl)
rename_visitor.visit(column_to_modify.ttl);
if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
column_to_modify.statistics.column_name = rename_to;
});
}
if (metadata.table_ttl.definition_ast)

View File

@ -218,11 +218,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
settings = col_ast->settings->as<ASTSetQuery &>().changes;
if (col_ast->statistics_desc)
{
statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type);
/// every column has name `x` here, so we have to set the name manually.
statistics.column_name = name;
}
}
else
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");

View File

@ -75,7 +75,7 @@ static ColumnsStatistics getStatisticsForColumns(
const auto * desc = all_columns.tryGet(column.name);
if (desc && !desc->statistics.empty())
{
auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics);
auto statistics = MergeTreeStatisticsFactory::instance().get(*desc);
all_statistics.push_back(std::move(statistics));
}
}

View File

@ -552,7 +552,7 @@ static std::set<ColumnStatisticsPtr> getStatisticsToRecalculate(const StorageMet
{
if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name))
{
stats_to_recalc.insert(stats_factory.get(col_desc.statistics));
stats_to_recalc.insert(stats_factory.get(col_desc));
}
}
return stats_to_recalc;
@ -1557,7 +1557,7 @@ private:
if (ctx->materialized_statistics.contains(col.name))
{
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics));
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col));
}
else
{

View File

@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
{
}
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_)
: stats_desc(stats_desc_)
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_)
: stats_desc(stats_desc_), column_name(column_name_)
{
}
@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const
const String & ColumnStatistics::columnName() const
{
return stats_desc.column_name;
return column_name;
}
UInt64 ColumnStatistics::rowCount() const
@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
}
}
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const
{
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(stats);
for (const auto & [type, desc] : stats.types_to_desc)
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(column_desc.statistics, column_desc.name);
for (const auto & [type, desc] : column_desc.statistics.types_to_desc)
{
auto it = creators.find(type);
if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type);
auto stat_ptr = (it->second)(desc, column_desc.type);
column_stat->stats[type] = stat_ptr;
}
return column_stat;
@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription &
ColumnsStatistics result;
for (const auto & col : columns)
if (!col.statistics.empty())
result.push_back(get(col.statistics));
result.push_back(get(col));
return result;
}

View File

@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
class ColumnStatistics
{
public:
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_);
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_);
void serialize(WriteBuffer & buf);
void deserialize(ReadBuffer & buf);
@ -73,10 +73,12 @@ public:
private:
friend class MergeTreeStatisticsFactory;
ColumnStatisticsDescription stats_desc;
String column_name;
std::map<StatisticsType, StatisticsPtr> stats;
UInt64 rows = 0; /// the number of rows in the column
};
struct ColumnDescription;
class ColumnsDescription;
using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>;
using ColumnsStatistics = std::vector<ColumnStatisticsPtr>;
@ -91,7 +93,7 @@ public:
using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
ColumnStatisticsPtr get(const ColumnDescription & column_desc) const;
ColumnsStatistics getMany(const ColumnsDescription & columns) const;
void registerValidator(StatisticsType type, Validator validator);

View File

@ -6,7 +6,6 @@
#include <Parsers/ASTStatisticsDeclaration.h>
#include <Parsers/queryToString.h>
#include <Parsers/ParserCreateQuery.h>
#include <Poco/Logger.h>
#include <Storages/ColumnsDescription.h>
@ -97,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
{
chassert(merging_column_type);
if (column_name.empty())
column_name = merging_column_name;
data_type = merging_column_type;
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
{
if (!if_not_exists && types_to_desc.contains(stats_type))
{
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name);
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name);
}
else if (!types_to_desc.contains(stats_type))
types_to_desc.emplace(stats_type, stats_desc);
@ -115,9 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
{
if (other.column_name != column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
types_to_desc = other.types_to_desc;
data_type = other.data_type;
}
@ -127,7 +120,7 @@ void ColumnStatisticsDescription::clear()
types_to_desc.clear();
}
std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
std::vector<std::pair<String, ColumnStatisticsDescription>> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
{
const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>();
if (!stat_definition_ast)
@ -145,7 +138,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
statistics_types.emplace(stat.type, stat);
}
std::vector<ColumnStatisticsDescription> result;
std::vector<std::pair<String, ColumnStatisticsDescription>> result;
result.reserve(stat_definition_ast->columns->children.size());
for (const auto & column_ast : stat_definition_ast->columns->children)
@ -157,10 +150,9 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name);
const auto & column = columns.getPhysical(physical_column_name);
stats.column_name = column.name;
stats.data_type = column.type;
stats.types_to_desc = statistics_types;
result.push_back(stats);
result.emplace_back(physical_column_name, stats);
}
if (result.empty())
@ -175,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c
if (stat_type_list_ast->children.empty())
throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column));
ColumnStatisticsDescription stats;
stats.column_name = column.name;
for (const auto & ast : stat_type_list_ast->children)
{
const auto & stat_type = ast->as<const ASTFunction &>().name;
SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone());
if (stats.types_to_desc.contains(stat.type))
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type);
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type);
stats.types_to_desc.emplace(stat.type, std::move(stat));
}
stats.data_type = data_type;

View File

@ -55,12 +55,12 @@ struct ColumnStatisticsDescription
ASTPtr getAST() const;
static std::vector<ColumnStatisticsDescription> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
/// get a vector of <column name, statistics desc> pair
static std::vector<std::pair<String, ColumnStatisticsDescription>> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type);
using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>;
StatisticsTypeDescMap types_to_desc;
String column_name;
DataTypePtr data_type;
};

View File

@ -6,11 +6,17 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance(
"node1", user_configs=["config/config.xml"], with_zookeeper=True
"node1",
user_configs=["config/config.xml"],
with_zookeeper=True,
macros={"replica": "a", "shard": "shard1"},
)
node2 = cluster.add_instance(
"node2", user_configs=["config/config.xml"], with_zookeeper=True
"node2",
user_configs=["config/config.xml"],
with_zookeeper=True,
macros={"replica": "b", "shard": "shard1"},
)
@ -129,8 +135,8 @@ def test_single_node_normal(started_cluster):
def test_replicated_table_ddl(started_cluster):
node1.query("DROP TABLE IF EXISTS test_stat")
node2.query("DROP TABLE IF EXISTS test_stat")
node1.query("DROP TABLE IF EXISTS test_stat SYNC")
node2.query("DROP TABLE IF EXISTS test_stat SYNC")
node1.query(
"""
@ -183,3 +189,19 @@ def test_replicated_table_ddl(started_cluster):
)
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True)
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True)
def test_replicated_db(started_cluster):
node1.query("DROP DATABASE IF EXISTS test SYNC")
node2.query("DROP DATABASE IF EXISTS test SYNC")
node1.query(
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
)
node2.query(
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
)
node1.query(
"CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()"
)
node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64")
node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest")

View File

@ -1635,21 +1635,21 @@ QUERY id: 0
JOIN TREE
TABLE id: 10, alias: __table1, table_name: default.test_table
SELECT sum(float64 + 2) From test_table;
26.5
26.875
SELECT sum(2 + float64) From test_table;
26.5
26.875
SELECT sum(float64 - 2) From test_table;
6.5
6.875
SELECT sum(2 - float64) From test_table;
-6.5
-6.875
SELECT sum(float64) + 2 * count(float64) From test_table;
26.5
26.875
SELECT 2 * count(float64) + sum(float64) From test_table;
26.5
26.875
SELECT sum(float64) - 2 * count(float64) From test_table;
6.5
6.875
SELECT 2 * count(float64) - sum(float64) From test_table;
-6.5
-6.875
EXPLAIN QUERY TREE (SELECT sum(float64 + 2) From test_table);
QUERY id: 0
PROJECTION COLUMNS
@ -2463,25 +2463,25 @@ QUERY id: 0
JOIN TREE
TABLE id: 12, alias: __table1, table_name: default.test_table
SELECT sum(float64 + 2) + sum(float64 + 3) From test_table;
58
58.75
SELECT sum(float64 + 2) - sum(float64 + 3) From test_table;
-5
SELECT sum(float64 - 2) + sum(float64 - 3) From test_table;
8
8.75
SELECT sum(float64 - 2) - sum(float64 - 3) From test_table;
5
SELECT sum(2 - float64) - sum(3 - float64) From test_table;
-5
SELECT (sum(float64) + 2 * count(float64)) + (sum(float64) + 3 * count(float64)) From test_table;
58
58.75
SELECT (sum(float64) + 2 * count(float64)) - (sum(float64) + 3 * count(float64)) From test_table;
-5
SELECT (sum(float64) - 2 * count(float64)) + (sum(float64) - 3 * count(float64)) From test_table;
8
8.75
SELECT (sum(float64) - 2 * count(float64)) - (sum(float64) - 3 * count(float64)) From test_table;
5
SELECT (2 * count(float64) - sum(float64)) + (3 * count(float64) - sum(float64)) From test_table;
-8
-8.75
EXPLAIN QUERY TREE (SELECT sum(float64 + 2) + sum(float64 + 3) From test_table);
QUERY id: 0
PROJECTION COLUMNS

View File

@ -25,11 +25,12 @@ CREATE TABLE test_table
decimal32 Decimal32(5),
) ENGINE=MergeTree ORDER BY uint64;
INSERT INTO test_table VALUES (1, 1.1, 1.11);
INSERT INTO test_table VALUES (2, 2.2, 2.22);
INSERT INTO test_table VALUES (3, 3.3, 3.33);
INSERT INTO test_table VALUES (4, 4.4, 4.44);
INSERT INTO test_table VALUES (5, 5.5, 5.55);
-- Use Float64 numbers divisible by 1/16 (or some other small power of two), so that their sum doesn't depend on summation order.
INSERT INTO test_table VALUES (1, 1.125, 1.11);
INSERT INTO test_table VALUES (2, 2.250, 2.22);
INSERT INTO test_table VALUES (3, 3.375, 3.33);
INSERT INTO test_table VALUES (4, 4.500, 4.44);
INSERT INTO test_table VALUES (5, 5.625, 5.55);
-- { echoOn }
SELECT sum(uint64 + 1 AS i) from test_table where i > 0;

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Tags: no-random-settings, no-random-merge-tree-settings
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query "
DROP TABLE IF EXISTS t_mutate_skip_part;
CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64)
ENGINE = MergeTree ORDER BY id PARTITION BY key
SETTINGS min_bytes_for_wide_part = 0;
INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000);
INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000);
SET mutations_sync = 2;
ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1;
ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0;
"
# Mutation query may return before the entry is added to part log.
# So, we may have to retry the flush of logs until all entries are actually flushed.
for _ in {1..10}; do
${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS"
res=$(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.part_log WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'")
if [[ $res -eq 4 ]]; then
break
fi
sleep 2.0
done
${CLICKHOUSE_CLIENT} --query "
SYSTEM FLUSH LOGS;
-- If part is skipped in mutation and hardlinked then read_rows must be 0.
SELECT part_name, read_rows
FROM system.part_log
WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'
ORDER BY part_name;
DROP TABLE IF EXISTS t_mutate_skip_part;
"

View File

@ -1,21 +0,0 @@
DROP TABLE IF EXISTS t_mutate_skip_part;
CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) ENGINE = MergeTree ORDER BY id PARTITION BY key;
INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000);
INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000);
SET mutations_sync = 2;
ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1;
ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0;
SYSTEM FLUSH LOGS;
-- If part is skipped in mutation and hardlinked then read_rows must be 0.
SELECT part_name, read_rows
FROM system.part_log
WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart'
ORDER BY part_name;
DROP TABLE IF EXISTS t_mutate_skip_part;

View File

@ -0,0 +1,4 @@
str
42
42
42

View File

@ -0,0 +1,9 @@
set allow_experimental_dynamic_type=1;
set allow_experimental_json_type=1;
set allow_experimental_analyzer=1;
select d.String from (select 'str'::Dynamic as d);
select json.a from (select '{"a" : 42}'::JSON as json);
select json.a from (select '{"a" : 42}'::JSON(a UInt32) as json);
select json.a.:Int64 from (select materialize('{"a" : 42}')::JSON as json);

View File

@ -0,0 +1,8 @@
2 {"foo2":"bar"} 1
3 {"foo2":"bar"} 1
2 {"foo2":"baz"} 2
3 {"foo2":"bar"} 1
2 {"foo2":"bar"} 1
3 {"foo2":"bar"} 1
2 {"foo2":"baz"} 2
3 {"foo2":"bar"} 1

View File

@ -0,0 +1,33 @@
SET allow_experimental_json_type = 1;
DROP TABLE IF EXISTS test_new_json_type;
CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id;
INSERT INTO test_new_json_type format JSONEachRow
{"id":1,"data":{"foo1":"bar"},"version":1}
{"id":2,"data":{"foo2":"bar"},"version":1}
{"id":3,"data":{"foo2":"bar"},"version":1}
;
SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2;
SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
DROP TABLE test_new_json_type;
CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1;
INSERT INTO test_new_json_type format JSONEachRow
{"id":1,"data":{"foo1":"bar"},"version":1}
{"id":2,"data":{"foo2":"bar"},"version":1}
{"id":3,"data":{"foo2":"bar"},"version":1}
;
SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id;
INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2;
SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST;
DROP TABLE test_new_json_type;

View File

@ -1,3 +1,6 @@
-- There is a bug in old analyzer with currentDatabase() and distributed query.
SET enable_analyzer = 1;
DROP TABLE IF EXISTS t_local_1;
DROP TABLE IF EXISTS t_local_2;
DROP TABLE IF EXISTS t_merge;
@ -10,7 +13,7 @@ INSERT INTO t_local_1 VALUES (1);
INSERT INTO t_local_2 VALUES (2);
CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$');
CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
CREATE TABLE t_distr AS t_local_1 ENGINE = Distributed('test_shard_localhost', currentDatabase(), t_merge, rand());
SELECT a, _table FROM t_merge ORDER BY a;
SELECT a, _table FROM t_distr ORDER BY a;

View File

@ -0,0 +1,3 @@
{"foo1":"bar"} {"foo1":"bar"}
{"foo2":"bar"} {"foo2":"bar"}
{"foo2":"bar"} {"foo2":"bar"}

View File

@ -0,0 +1,22 @@
SET allow_experimental_json_type=1;
DROP TABLE IF EXISTS test_new_json_type;
CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id;
INSERT INTO test_new_json_type format JSONEachRow
{"id":1,"data":{"foo1":"bar"},"version":1}
{"id":2,"data":{"foo2":"bar"},"version":1}
{"id":3,"data":{"foo2":"bar"},"version":1}
;
SELECT
a.data
, b.data
FROM
test_new_json_type a
JOIN test_new_json_type b
ON a.id = b.id;
DROP TABLE test_new_json_type;