mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #68820 from hanfei1991/hanfei/fix-logical-err-stats
fix logical err of modify statistics
This commit is contained in:
commit
b578d1af1c
@ -701,7 +701,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
|
||||
}
|
||||
|
||||
column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
|
||||
if (col_decl.statistics_desc)
|
||||
{
|
||||
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)
|
||||
|
@ -706,9 +706,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
}
|
||||
|
||||
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
|
||||
for (const auto & stats : stats_vec)
|
||||
for (const auto & [stats_column_name, stats] : stats_vec)
|
||||
{
|
||||
metadata.columns.modify(stats.column_name,
|
||||
metadata.columns.modify(stats_column_name,
|
||||
[&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
|
||||
}
|
||||
}
|
||||
@ -735,14 +735,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
{
|
||||
if (!metadata.columns.has(statistics_column_name))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name);
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name);
|
||||
}
|
||||
}
|
||||
|
||||
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
|
||||
for (const auto & stats : stats_vec)
|
||||
for (const auto & [stats_column_name, stats] : stats_vec)
|
||||
{
|
||||
metadata.columns.modify(stats.column_name,
|
||||
metadata.columns.modify(stats_column_name,
|
||||
[&](ColumnDescription & column) { column.statistics.assign(stats); });
|
||||
}
|
||||
}
|
||||
@ -867,8 +867,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
rename_visitor.visit(column_to_modify.default_desc.expression);
|
||||
if (column_to_modify.ttl)
|
||||
rename_visitor.visit(column_to_modify.ttl);
|
||||
if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
|
||||
column_to_modify.statistics.column_name = rename_to;
|
||||
});
|
||||
}
|
||||
if (metadata.table_ttl.definition_ast)
|
||||
|
@ -218,11 +218,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
|
||||
settings = col_ast->settings->as<ASTSetQuery &>().changes;
|
||||
|
||||
if (col_ast->statistics_desc)
|
||||
{
|
||||
statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type);
|
||||
/// every column has name `x` here, so we have to set the name manually.
|
||||
statistics.column_name = name;
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");
|
||||
|
@ -75,7 +75,7 @@ static ColumnsStatistics getStatisticsForColumns(
|
||||
const auto * desc = all_columns.tryGet(column.name);
|
||||
if (desc && !desc->statistics.empty())
|
||||
{
|
||||
auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics);
|
||||
auto statistics = MergeTreeStatisticsFactory::instance().get(*desc);
|
||||
all_statistics.push_back(std::move(statistics));
|
||||
}
|
||||
}
|
||||
|
@ -552,7 +552,7 @@ static std::set<ColumnStatisticsPtr> getStatisticsToRecalculate(const StorageMet
|
||||
{
|
||||
if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name))
|
||||
{
|
||||
stats_to_recalc.insert(stats_factory.get(col_desc.statistics));
|
||||
stats_to_recalc.insert(stats_factory.get(col_desc));
|
||||
}
|
||||
}
|
||||
return stats_to_recalc;
|
||||
@ -1557,7 +1557,7 @@ private:
|
||||
|
||||
if (ctx->materialized_statistics.contains(col.name))
|
||||
{
|
||||
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics));
|
||||
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
|
||||
{
|
||||
}
|
||||
|
||||
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_)
|
||||
: stats_desc(stats_desc_)
|
||||
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_)
|
||||
: stats_desc(stats_desc_), column_name(column_name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const
|
||||
|
||||
const String & ColumnStatistics::columnName() const
|
||||
{
|
||||
return stats_desc.column_name;
|
||||
return column_name;
|
||||
}
|
||||
|
||||
UInt64 ColumnStatistics::rowCount() const
|
||||
@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
|
||||
}
|
||||
}
|
||||
|
||||
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const
|
||||
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const
|
||||
{
|
||||
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(stats);
|
||||
for (const auto & [type, desc] : stats.types_to_desc)
|
||||
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(column_desc.statistics, column_desc.name);
|
||||
for (const auto & [type, desc] : column_desc.statistics.types_to_desc)
|
||||
{
|
||||
auto it = creators.find(type);
|
||||
if (it == creators.end())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
|
||||
auto stat_ptr = (it->second)(desc, stats.data_type);
|
||||
auto stat_ptr = (it->second)(desc, column_desc.type);
|
||||
column_stat->stats[type] = stat_ptr;
|
||||
}
|
||||
return column_stat;
|
||||
@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription &
|
||||
ColumnsStatistics result;
|
||||
for (const auto & col : columns)
|
||||
if (!col.statistics.empty())
|
||||
result.push_back(get(col.statistics));
|
||||
result.push_back(get(col));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
|
||||
class ColumnStatistics
|
||||
{
|
||||
public:
|
||||
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_);
|
||||
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_);
|
||||
|
||||
void serialize(WriteBuffer & buf);
|
||||
void deserialize(ReadBuffer & buf);
|
||||
@ -73,10 +73,12 @@ public:
|
||||
private:
|
||||
friend class MergeTreeStatisticsFactory;
|
||||
ColumnStatisticsDescription stats_desc;
|
||||
String column_name;
|
||||
std::map<StatisticsType, StatisticsPtr> stats;
|
||||
UInt64 rows = 0; /// the number of rows in the column
|
||||
};
|
||||
|
||||
struct ColumnDescription;
|
||||
class ColumnsDescription;
|
||||
using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>;
|
||||
using ColumnsStatistics = std::vector<ColumnStatisticsPtr>;
|
||||
@ -91,7 +93,7 @@ public:
|
||||
using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
|
||||
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
|
||||
|
||||
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
|
||||
ColumnStatisticsPtr get(const ColumnDescription & column_desc) const;
|
||||
ColumnsStatistics getMany(const ColumnsDescription & columns) const;
|
||||
|
||||
void registerValidator(StatisticsType type, Validator validator);
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Parsers/ASTStatisticsDeclaration.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
|
||||
@ -97,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
|
||||
{
|
||||
chassert(merging_column_type);
|
||||
|
||||
if (column_name.empty())
|
||||
column_name = merging_column_name;
|
||||
|
||||
data_type = merging_column_type;
|
||||
|
||||
for (const auto & [stats_type, stats_desc]: other.types_to_desc)
|
||||
{
|
||||
if (!if_not_exists && types_to_desc.contains(stats_type))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name);
|
||||
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name);
|
||||
}
|
||||
else if (!types_to_desc.contains(stats_type))
|
||||
types_to_desc.emplace(stats_type, stats_desc);
|
||||
@ -115,9 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
|
||||
|
||||
void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
|
||||
{
|
||||
if (other.column_name != column_name)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
|
||||
|
||||
types_to_desc = other.types_to_desc;
|
||||
data_type = other.data_type;
|
||||
}
|
||||
@ -127,7 +120,7 @@ void ColumnStatisticsDescription::clear()
|
||||
types_to_desc.clear();
|
||||
}
|
||||
|
||||
std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
|
||||
std::vector<std::pair<String, ColumnStatisticsDescription>> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
|
||||
{
|
||||
const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>();
|
||||
if (!stat_definition_ast)
|
||||
@ -145,7 +138,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
|
||||
statistics_types.emplace(stat.type, stat);
|
||||
}
|
||||
|
||||
std::vector<ColumnStatisticsDescription> result;
|
||||
std::vector<std::pair<String, ColumnStatisticsDescription>> result;
|
||||
result.reserve(stat_definition_ast->columns->children.size());
|
||||
|
||||
for (const auto & column_ast : stat_definition_ast->columns->children)
|
||||
@ -157,10 +150,9 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name);
|
||||
|
||||
const auto & column = columns.getPhysical(physical_column_name);
|
||||
stats.column_name = column.name;
|
||||
stats.data_type = column.type;
|
||||
stats.types_to_desc = statistics_types;
|
||||
result.push_back(stats);
|
||||
result.emplace_back(physical_column_name, stats);
|
||||
}
|
||||
|
||||
if (result.empty())
|
||||
@ -175,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c
|
||||
if (stat_type_list_ast->children.empty())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column));
|
||||
ColumnStatisticsDescription stats;
|
||||
stats.column_name = column.name;
|
||||
for (const auto & ast : stat_type_list_ast->children)
|
||||
{
|
||||
const auto & stat_type = ast->as<const ASTFunction &>().name;
|
||||
|
||||
SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone());
|
||||
if (stats.types_to_desc.contains(stat.type))
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type);
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type);
|
||||
stats.types_to_desc.emplace(stat.type, std::move(stat));
|
||||
}
|
||||
stats.data_type = data_type;
|
||||
|
@ -55,12 +55,12 @@ struct ColumnStatisticsDescription
|
||||
|
||||
ASTPtr getAST() const;
|
||||
|
||||
static std::vector<ColumnStatisticsDescription> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
|
||||
/// get a vector of <column name, statistics desc> pair
|
||||
static std::vector<std::pair<String, ColumnStatisticsDescription>> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
|
||||
static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type);
|
||||
|
||||
using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>;
|
||||
StatisticsTypeDescMap types_to_desc;
|
||||
String column_name;
|
||||
DataTypePtr data_type;
|
||||
};
|
||||
|
||||
|
@ -6,11 +6,17 @@ from helpers.cluster import ClickHouseCluster
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
node1 = cluster.add_instance(
|
||||
"node1", user_configs=["config/config.xml"], with_zookeeper=True
|
||||
"node1",
|
||||
user_configs=["config/config.xml"],
|
||||
with_zookeeper=True,
|
||||
macros={"replica": "a", "shard": "shard1"},
|
||||
)
|
||||
|
||||
node2 = cluster.add_instance(
|
||||
"node2", user_configs=["config/config.xml"], with_zookeeper=True
|
||||
"node2",
|
||||
user_configs=["config/config.xml"],
|
||||
with_zookeeper=True,
|
||||
macros={"replica": "b", "shard": "shard1"},
|
||||
)
|
||||
|
||||
|
||||
@ -129,8 +135,8 @@ def test_single_node_normal(started_cluster):
|
||||
|
||||
|
||||
def test_replicated_table_ddl(started_cluster):
|
||||
node1.query("DROP TABLE IF EXISTS test_stat")
|
||||
node2.query("DROP TABLE IF EXISTS test_stat")
|
||||
node1.query("DROP TABLE IF EXISTS test_stat SYNC")
|
||||
node2.query("DROP TABLE IF EXISTS test_stat SYNC")
|
||||
|
||||
node1.query(
|
||||
"""
|
||||
@ -183,3 +189,19 @@ def test_replicated_table_ddl(started_cluster):
|
||||
)
|
||||
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "a", True)
|
||||
check_stat_file_on_disk(node2, "test_stat", "all_0_0_0_3", "b", True)
|
||||
|
||||
|
||||
def test_replicated_db(started_cluster):
|
||||
node1.query("DROP DATABASE IF EXISTS test SYNC")
|
||||
node2.query("DROP DATABASE IF EXISTS test SYNC")
|
||||
node1.query(
|
||||
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
|
||||
)
|
||||
node2.query(
|
||||
"CREATE DATABASE test ENGINE = Replicated('/test/shared_stats', '{shard}', '{replica}')"
|
||||
)
|
||||
node1.query(
|
||||
"CREATE TABLE test.test_stats (a Int64, b Int64) ENGINE = ReplicatedMergeTree() ORDER BY()"
|
||||
)
|
||||
node2.query("ALTER TABLE test.test_stats MODIFY COLUMN b Float64")
|
||||
node2.query("ALTER TABLE test.test_stats MODIFY STATISTICS b TYPE tdigest")
|
||||
|
Loading…
Reference in New Issue
Block a user