mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
More correct version
This commit is contained in:
parent
d18609467b
commit
c11ad44aad
@ -38,11 +38,33 @@ namespace ErrorCodes
|
||||
|
||||
String DataTypeAggregateFunction::doGetName() const
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "doGetName stack trace:{}", StackTrace().toString());
|
||||
return getNameImpl(true);
|
||||
}
|
||||
|
||||
|
||||
String DataTypeAggregateFunction::getNameWithoutVersion() const
|
||||
{
|
||||
return getNameImpl(false);
|
||||
}
|
||||
|
||||
|
||||
size_t DataTypeAggregateFunction::getVersion() const
|
||||
{
|
||||
if (version)
|
||||
return *version;
|
||||
return function->getDefaultVersion();
|
||||
}
|
||||
|
||||
|
||||
String DataTypeAggregateFunction::getNameImpl(bool with_version) const
|
||||
{
|
||||
WriteBufferFromOwnString stream;
|
||||
stream << "AggregateFunction(";
|
||||
if (version)
|
||||
stream << version << ", ";
|
||||
|
||||
/// If aggregate function does not support versioning its version is 0 and is not printed.
|
||||
auto data_type_version = getVersion();
|
||||
if (with_version && data_type_version)
|
||||
stream << data_type_version << ", ";
|
||||
stream << function->getName();
|
||||
|
||||
if (!parameters.empty())
|
||||
@ -64,12 +86,16 @@ String DataTypeAggregateFunction::doGetName() const
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
|
||||
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
|
||||
{
|
||||
/// FIXME: ColumnAggregateFunction also uses function->serialize methods
|
||||
/// FIXME: There are a lot of function->serialize inside ColumnAggregateFunction.
|
||||
/// Looks like it also needs version.
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "KSSENII COLUMN");
|
||||
return ColumnAggregateFunction::create(function);
|
||||
}
|
||||
|
||||
|
||||
/// Create empty state
|
||||
Field DataTypeAggregateFunction::getDefault() const
|
||||
{
|
||||
@ -100,20 +126,18 @@ Field DataTypeAggregateFunction::getDefault() const
|
||||
|
||||
bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
|
||||
{
|
||||
return typeid(rhs) == typeid(*this) && getName() == rhs.getName();
|
||||
return typeid(rhs) == typeid(*this) && getNameWithoutVersion() == typeid_cast<const DataTypeAggregateFunction &>(rhs).getNameWithoutVersion();
|
||||
}
|
||||
|
||||
|
||||
SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "get serializaton version: {}, name: {}, stack: {}", version, getName(), StackTrace().toString());
|
||||
return std::make_shared<SerializationAggregateFunction>(function, version);
|
||||
return std::make_shared<SerializationAggregateFunction>(function, getVersion());
|
||||
}
|
||||
|
||||
|
||||
static DataTypePtr create(const ASTPtr & arguments)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "create data type: {}", StackTrace().toString());
|
||||
|
||||
String function_name;
|
||||
AggregateFunctionPtr function;
|
||||
DataTypes argument_types;
|
||||
@ -189,6 +213,7 @@ static DataTypePtr create(const ASTPtr & arguments)
|
||||
return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version);
|
||||
}
|
||||
|
||||
|
||||
void registerDataTypeAggregateFunction(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataType("AggregateFunction", create);
|
||||
|
@ -11,6 +11,11 @@ namespace DB
|
||||
|
||||
/** Type - the state of the aggregate function.
|
||||
* Type parameters is an aggregate function, the types of its arguments, and its parameters (for parametric aggregate functions).
|
||||
*
|
||||
* Data type can support versioning for serialization of aggregate function state.
|
||||
* Version 0 also means no versioning. When a table with versioned data type is attached, its version is parsed from AST. If
|
||||
* there is no version in AST, then it is either attach with no version in metadata (then version is 0) or it
|
||||
* is a new data type (then version is default). In distributed queries version of data type is known from data type name.
|
||||
*/
|
||||
class DataTypeAggregateFunction final : public IDataType
|
||||
{
|
||||
@ -18,7 +23,9 @@ private:
|
||||
AggregateFunctionPtr function;
|
||||
DataTypes argument_types;
|
||||
Array parameters;
|
||||
mutable size_t version;
|
||||
mutable std::optional<size_t> version;
|
||||
|
||||
String getNameImpl(bool with_version) const;
|
||||
|
||||
public:
|
||||
static constexpr bool is_parametric = true;
|
||||
@ -28,7 +35,7 @@ public:
|
||||
: function(function_)
|
||||
, argument_types(argument_types_)
|
||||
, parameters(parameters_)
|
||||
, version(version_ ? *version_ : function_->getDefaultVersion())
|
||||
, version(version_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -36,6 +43,7 @@ public:
|
||||
AggregateFunctionPtr getFunction() const { return function; }
|
||||
|
||||
String doGetName() const override;
|
||||
String getNameWithoutVersion() const;
|
||||
const char * getFamilyName() const override { return "AggregateFunction"; }
|
||||
TypeIndex getTypeId() const override { return TypeIndex::AggregateFunction; }
|
||||
|
||||
@ -57,8 +65,18 @@ public:
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
|
||||
bool isVersioned() const { return function->isVersioned(); }
|
||||
void setVersion(size_t version_) const { version = version_; }
|
||||
/// Version of aggregate function state serialization.
|
||||
size_t getVersion() const;
|
||||
|
||||
/// Version is not empty only if it was parsed from AST.
|
||||
/// It is ok to have an empty version value here - then for serialization
|
||||
/// a default (latest) version is used. This method is used to force some
|
||||
/// zero version to be used instead of default - if there was no version in AST.
|
||||
void setVersionIfEmpty(size_t version_) const
|
||||
{
|
||||
if (!version)
|
||||
version = version_;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -389,7 +389,7 @@ std::pair<time_t, time_t> IMergeTreeDataPart::getMinMaxTime() const
|
||||
}
|
||||
|
||||
|
||||
void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
|
||||
void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns, bool loaded_from_disk)
|
||||
{
|
||||
columns = new_columns;
|
||||
column_name_to_position.clear();
|
||||
@ -398,8 +398,15 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns)
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
column_name_to_position.emplace(column.name, pos);
|
||||
|
||||
/// TODO: May be there is a better way or a better place for that.
|
||||
const auto * aggregate_function_data_type = typeid_cast<const DataTypeAggregateFunction *>(column.type.get());
|
||||
if (loaded_from_disk && aggregate_function_data_type)
|
||||
aggregate_function_data_type->setVersionIfEmpty(0);
|
||||
|
||||
for (const auto & subcolumn : column.type->getSubcolumnNames())
|
||||
column_name_to_position.emplace(Nested::concatenateName(column.name, subcolumn), pos);
|
||||
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
@ -1007,6 +1014,7 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
||||
metadata_snapshot = metadata_snapshot->projections.get(name).metadata;
|
||||
NamesAndTypesList loaded_columns;
|
||||
|
||||
bool loaded_from_disk = false;
|
||||
if (!volume->getDisk()->exists(path))
|
||||
{
|
||||
/// We can get list of columns only from columns.txt in compact parts.
|
||||
@ -1027,18 +1035,15 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
||||
loaded_columns.writeText(*buf);
|
||||
}
|
||||
volume->getDisk()->moveFile(path + ".tmp", path);
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "Loaded from metadata");
|
||||
}
|
||||
else
|
||||
{
|
||||
//LOG_TRACE(&Poco::Logger::get("kssenii"), "Loading columns stacktrace: {}", col.name, col.type->getName());
|
||||
loaded_columns.readText(*volume->getDisk()->readFile(path));
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "Loaded from disk");
|
||||
loaded_from_disk = true;
|
||||
|
||||
for (auto & col : loaded_columns)
|
||||
{
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "Setting version for columns: {}, {}", col.name, col.type->getName());
|
||||
if (auto agg = typeid_cast<const DataTypeAggregateFunction *>(col.type.get()))
|
||||
agg->setVersion(0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1047,7 +1052,7 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
||||
LOG_TRACE(&Poco::Logger::get("kssenii"), "Loaded columns: {}, {}", col.name, col.type->getName());
|
||||
}
|
||||
|
||||
setColumns(loaded_columns);
|
||||
setColumns(loaded_columns, loaded_from_disk);
|
||||
}
|
||||
|
||||
bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const
|
||||
|
@ -121,7 +121,7 @@ public:
|
||||
|
||||
String getTypeName() const { return getType().toString(); }
|
||||
|
||||
void setColumns(const NamesAndTypesList & new_columns);
|
||||
void setColumns(const NamesAndTypesList & new_columns, bool loaded_from_disk = false);
|
||||
|
||||
const NamesAndTypesList & getColumns() const { return columns; }
|
||||
|
||||
|
@ -6,7 +6,7 @@ from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
|
||||
|
||||
node1 = cluster.add_instance('node1', main_configs=["configs/log_conf.xml"])
|
||||
node1 = cluster.add_instance('node1', main_configs=["configs/log_conf.xml"], stay_alive=True)
|
||||
node2 = cluster.add_instance('node2', main_configs=["configs/log_conf.xml"],
|
||||
image='yandex/clickhouse-server',
|
||||
tag='21.5', with_installed_binary=True, stay_alive=True)
|
||||
@ -14,6 +14,12 @@ node2 = cluster.add_instance('node2', main_configs=["configs/log_conf.xml"],
|
||||
node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server', tag='21.2', with_installed_binary=True, stay_alive=True)
|
||||
|
||||
|
||||
def insert_data(node):
|
||||
node.query(""" INSERT INTO test_table
|
||||
SELECT toDateTime('2020-10-01 19:20:30'), 1,
|
||||
sumMapState(arrayMap(i -> 1, range(300)), arrayMap(i -> 1, range(300)));""")
|
||||
|
||||
|
||||
def create_and_fill_table(node):
|
||||
node.query("DROP TABLE IF EXISTS test_table;")
|
||||
node.query("""
|
||||
@ -21,13 +27,10 @@ def create_and_fill_table(node):
|
||||
(
|
||||
`col1` DateTime,
|
||||
`col2` Int64,
|
||||
`col3` AggregateFunction(sumMap, Tuple(Array(UInt8), Array(UInt8)))
|
||||
`col3` AggregateFunction(sumMap, Array(UInt8), Array(UInt8))
|
||||
)
|
||||
ENGINE = AggregatingMergeTree() ORDER BY (col1, col2) """)
|
||||
|
||||
node.query(""" INSERT INTO test_table
|
||||
SELECT toDateTime('2020-10-01 19:20:30'), 1,
|
||||
sumMapState((arrayMap(i -> 1, range(300)), arrayMap(i -> 1, range(300))));""")
|
||||
insert_data(node)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
@ -92,14 +95,52 @@ def test_aggregate_function_versioning_server_upgrade(start_cluster):
|
||||
for node in [node1, node2]:
|
||||
create_and_fill_table(node)
|
||||
|
||||
expected = "([1],[300])"
|
||||
|
||||
new_server_data = node1.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(new_server_data == "([1],[300])")
|
||||
old_server_data = node2.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(old_server_data != new_server_data)
|
||||
assert(old_server_data == "([1],[44])")
|
||||
|
||||
node2.restart_with_latest_version()
|
||||
|
||||
# Check that after server upgrade aggregate function is serialized according to older version.
|
||||
upgraded_server_data = node2.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(upgraded_server_data == old_server_data)
|
||||
assert(upgraded_server_data == "([1],[44])")
|
||||
|
||||
# Remote fetches are still with older version.
|
||||
data_from_upgraded_to_new_server = node1.query("select finalizeAggregation(col3) from remote('node2', default.test_table);").strip()
|
||||
assert(data_from_upgraded_to_new_server == upgraded_server_data == "([1],[44])")
|
||||
|
||||
# Check it is ok to write into table with older version of aggregate function.
|
||||
insert_data(node2)
|
||||
|
||||
# Hm, should newly inserted data be serialized as old version?
|
||||
upgraded_server_data = node2.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(upgraded_server_data == "([1],[300])\n([1],[44])")
|
||||
|
||||
|
||||
def test_aggregate_function_versioning_persisting_metadata(start_cluster):
|
||||
for node in [node1, node2]:
|
||||
create_and_fill_table(node)
|
||||
node2.restart_with_latest_version()
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.query("DETACH TABLE test_table")
|
||||
node.query("ATTACH TABLE test_table")
|
||||
|
||||
for node in [node1, node2]:
|
||||
insert_data(node)
|
||||
|
||||
new_server_data = node1.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(new_server_data == "([1],[300])\n([1],[300])")
|
||||
upgraded_server_data = node2.query("select finalizeAggregation(col3) from default.test_table;").strip()
|
||||
assert(upgraded_server_data == "([1],[44])\n([1],[44])")
|
||||
|
||||
for node in [node1, node2]:
|
||||
node.restart_clickhouse()
|
||||
insert_data(node)
|
||||
|
||||
result = node1.query("select finalizeAggregation(col3) from remote('127.0.0.{1,2}', default.test_table);").strip()
|
||||
assert(result == "([1],[300])\n([1],[300])\n([1],[300])\n([1],[300])")
|
||||
result = node2.query("select finalizeAggregation(col3) from remote('127.0.0.{1,2}', default.test_table);").strip()
|
||||
assert(result == "([1],[44])\n([1],[44])\n([1],[44])\n([1],[44])")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user