From 19025fb4ff4b70a1f5a5a202f3f4eaf1f0132b07 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 15 Dec 2024 22:35:43 +0000 Subject: [PATCH] try to avoid field operations by providing new method in IColumn --- src/Analyzer/ConstantNode.cpp | 31 +++++++++- src/Analyzer/ConstantNode.h | 10 ++-- src/Analyzer/ConstantValue.h | 18 +----- src/Columns/ColumnAggregateFunction.cpp | 15 +++++ src/Columns/ColumnAggregateFunction.h | 2 + src/Columns/ColumnArray.cpp | 23 ++++++++ src/Columns/ColumnArray.h | 1 + src/Columns/ColumnCompressed.h | 1 + src/Columns/ColumnConst.h | 5 ++ src/Columns/ColumnDecimal.h | 6 ++ src/Columns/ColumnDynamic.cpp | 16 +++++ src/Columns/ColumnDynamic.h | 2 + src/Columns/ColumnFixedString.h | 10 ++++ src/Columns/ColumnFunction.cpp | 22 +++++++ src/Columns/ColumnFunction.h | 2 + src/Columns/ColumnLowCardinality.h | 4 ++ src/Columns/ColumnMap.cpp | 25 ++++++++ src/Columns/ColumnMap.h | 1 + src/Columns/ColumnNullable.cpp | 10 ++++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnObject.cpp | 77 +++++++++++++++++++++++++ src/Columns/ColumnObject.h | 1 + src/Columns/ColumnObjectDeprecated.cpp | 54 +++++++++++++++++ src/Columns/ColumnObjectDeprecated.h | 2 + src/Columns/ColumnSparse.cpp | 5 ++ src/Columns/ColumnSparse.h | 1 + src/Columns/ColumnString.h | 9 +++ src/Columns/ColumnTuple.cpp | 23 ++++++++ src/Columns/ColumnTuple.h | 1 + src/Columns/ColumnUnique.h | 4 ++ src/Columns/ColumnVariant.cpp | 11 ++++ src/Columns/ColumnVariant.h | 1 + src/Columns/ColumnVector.h | 9 +++ src/Columns/IColumn.h | 4 ++ src/Columns/IColumnDummy.cpp | 5 ++ src/Columns/IColumnDummy.h | 1 + src/Common/FieldVisitorToString.cpp | 16 +++++ src/Common/FieldVisitorToString.h | 4 ++ src/DataTypes/FieldToDataType.cpp | 28 +++++++++ src/DataTypes/FieldToDataType.h | 4 ++ src/Planner/PlannerActionsVisitor.cpp | 7 ++- 41 files changed, 444 insertions(+), 28 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index b764fedea6c..592d10f2395 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -102,10 +102,35 @@ bool ConstantNode::requiresCastCall(Field::Types::Which type, const DataTypePtr return need_to_add_cast_function; } -bool ConstantNode::requiresCastCall() const +bool ConstantNode::requiresCastCall(const DataTypePtr & field_type, const DataTypePtr & data_type) { - const auto & [name, type, field_type] = getFieldAttributes(); - return requiresCastCall(type, field_type, getResultType()); + bool need_to_add_cast_function = false; + WhichDataType constant_value_literal_type(field_type); + WhichDataType constant_value_type(data_type); + + switch (constant_value_literal_type.idx) + { + case TypeIndex::String: + { + need_to_add_cast_function = !constant_value_type.isString(); + break; + } + case TypeIndex::UInt64: + case TypeIndex::Int64: + case TypeIndex::Float64: + { + WhichDataType constant_value_field_type(field_type); + need_to_add_cast_function = constant_value_field_type.idx != constant_value_type.idx; + break; + } + default: + { + need_to_add_cast_function = true; + break; + } + } + + return need_to_add_cast_function; } bool ConstantNode::receivedFromInitiatorServer() const diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 2044e2c88f5..c0392ff82df 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -92,10 +92,9 @@ public: return constant_value.getType(); } - static bool requiresCastCall(Field::Types::Which type, const DataTypePtr & field_type, const DataTypePtr & data_type); - /// Check if conversion to AST requires wrapping with _CAST function. - bool requiresCastCall() const; + static bool requiresCastCall(Field::Types::Which type, const DataTypePtr & field_type, const DataTypePtr & data_type); + static bool requiresCastCall(const DataTypePtr & field_type, const DataTypePtr & data_type); /// Check if constant is a result of _CAST function constant folding. bool receivedFromInitiatorServer() const; @@ -109,10 +108,9 @@ public: void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; - std::tuple getFieldAttributes() const + std::pair getValueNameAndType() const { - const auto & [name, type, field_type] = constant_value.getFieldAttributes(); - return {name + "_" + constant_value.getType()->getName(), type, field_type}; + return constant_value.getValueNameAndType(); } protected: diff --git a/src/Analyzer/ConstantValue.h b/src/Analyzer/ConstantValue.h index 1b99a7a01fc..b1402b46ac5 100644 --- a/src/Analyzer/ConstantValue.h +++ b/src/Analyzer/ConstantValue.h @@ -21,7 +21,6 @@ public: ConstantValue(const Field & field_, DataTypePtr data_type_) : column(data_type_->createColumnConst(1, field_)) , data_type(std::move(data_type_)) - , field_cache(applyVisitor(FieldVisitorToString(), field_), field_.getType(), applyVisitor(FieldToDataType(), field_)) {} const ColumnPtr & getColumn() const @@ -34,21 +33,9 @@ public: return data_type; } - const std::tuple & getFieldAttributes() const & + std::pair getValueNameAndType() const { - if (std::get(field_cache).empty()) - { - Field field; - column->get(0, field); - field_cache = {applyVisitor(FieldVisitorToString(), field), field.getType(), applyVisitor(FieldToDataType(), field)}; - } - - return field_cache; - } - - std::tuple getFieldAttributes() const && - { - return getFieldAttributes(); + return column->getValueNameAndType(0); } private: @@ -62,7 +49,6 @@ private: ColumnPtr column; DataTypePtr data_type; - mutable std::tuple field_cache; }; } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 69bea288779..71bc3615a7d 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -1,4 +1,6 @@ +#include #include +#include #include #include @@ -470,6 +472,19 @@ void ColumnAggregateFunction::get(size_t n, Field & res) const res = operator[](n); } +std::pair ColumnAggregateFunction::getValueNameAndType(size_t n) const +{ + String state; + { + WriteBufferFromOwnString buffer; + func->serialize(data[n], buffer, version); + WriteBufferFromString wb(state); + writeQuoted(buffer.str(), wb); + } + + return {state, DataTypeFactory::instance().get(type_string)}; +} + StringRef ColumnAggregateFunction::getDataAt(size_t n) const { return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index b581c3ba3b4..3252a66f3fa 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -136,6 +136,8 @@ public: void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t n) const override; + bool isDefaultAt(size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method isDefaultAt is not supported for ColumnAggregateFunction"); diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0c6d7c4e5c6..dec7c5ec46b 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -148,6 +150,27 @@ void ColumnArray::get(size_t n, Field & res) const res_arr.push_back(getData()[offset + i]); } +std::pair ColumnArray::getValueNameAndType(size_t n) const +{ + size_t offset = offsetAt(n); + size_t size = sizeAt(n); + + String value_name {"["}; + DataTypes element_types; + element_types.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const auto & [value, type] = getData().getValueNameAndType(offset + i); + element_types.push_back(type); + if (i > 0) + value_name += ", "; + value_name += value; + } + value_name += "]"; + + return {value_name, std::make_shared(getLeastSupertype(element_types))}; +} StringRef ColumnArray::getDataAt(size_t n) const { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index a66f9041213..2fc0b97d4de 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -74,6 +74,7 @@ public: size_t size() const override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t n) const override; StringRef getDataAt(size_t n) const override; bool isDefaultAt(size_t n) const override; void insertData(const char * pos, size_t length) override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index c4270e8216b..396315ab4c4 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -82,6 +82,7 @@ public: TypeIndex getDataType() const override { throwMustBeDecompressed(); } Field operator[](size_t) const override { throwMustBeDecompressed(); } void get(size_t, Field &) const override { throwMustBeDecompressed(); } + virtual std::pair getValueNameAndType(size_t) const override { throwMustBeDecompressed(); } StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); } bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index ca38e76ea57..5d4991d6bff 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -78,6 +78,11 @@ public: data->get(0, res); } + virtual std::pair getValueNameAndType(size_t) const override + { + return data->getValueNameAndType(0); + } + StringRef getDataAt(size_t) const override { return data->getDataAt(0); diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 6f8360a54dd..fb55b4dc823 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -121,6 +123,10 @@ public: Field operator[](size_t n) const override { return DecimalField(data[n], scale); } void get(size_t n, Field & res) const override { res = (*this)[n]; } + std::pair getValueNameAndType(size_t n) const override + { + return {FieldVisitorToString()(data[n], scale), FieldToDataType()(data[n], scale)}; + } bool getBool(size_t n) const override { return bool(data[n].value); } Int64 getInt(size_t n) const override { return Int64(data[n].value); } UInt64 get64(size_t n) const override; diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp index 6eb22a8bdf7..94639d1e618 100644 --- a/src/Columns/ColumnDynamic.cpp +++ b/src/Columns/ColumnDynamic.cpp @@ -303,6 +303,22 @@ void ColumnDynamic::get(size_t n, Field & res) const type->getDefaultSerialization()->deserializeBinary(res, buf, getFormatSettings()); } +std::pair ColumnDynamic::getValueNameAndType(size_t n) const +{ + const auto & variant_col = getVariantColumn(); + /// Check if value is not in shared variant. + if (variant_col.globalDiscriminatorAt(n) != getSharedVariantDiscriminator()) + return variant_col.getValueNameAndType(n); + + /// We should deeserialize value from shared variant. + const auto & shared_variant = getSharedVariant(); + auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n)); + ReadBufferFromMemory buf(value_data.data, value_data.size); + auto type = decodeDataType(buf); + const auto col = type->createColumn(); + type->getDefaultSerialization()->deserializeBinary(*col, buf, getFormatSettings()); + return col->getValueNameAndType(0); +} #if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnDynamic::insertFrom(const IColumn & src_, size_t n) diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h index fbab4d5da4c..7491986ec1d 100644 --- a/src/Columns/ColumnDynamic.h +++ b/src/Columns/ColumnDynamic.h @@ -143,6 +143,8 @@ public: void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t n) const override; + bool isDefaultAt(size_t n) const override { return variant_column_ptr->isDefaultAt(n); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 8cf0a6a57da..867fc56feac 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -1,5 +1,8 @@ #pragma once +#include +#include +#include #include #include #include @@ -87,6 +90,13 @@ public: res = std::string_view{reinterpret_cast(&chars[n * index]), n}; } + std::pair getValueNameAndType(size_t index) const override + { + WriteBufferFromOwnString buf; + writeQuoted(std::string_view{reinterpret_cast(&chars[n * index]), n}, buf); + return {buf.str(), std::make_shared()}; + } + StringRef getDataAt(size_t index) const override { return StringRef(&chars[n * index], n); diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index cc80d04444e..ea9c960c39f 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -91,6 +92,27 @@ void ColumnFunction::get(size_t n, Field & res) const res_tuple.push_back((*captured_columns[i].column)[n]); } +std::pair ColumnFunction::getValueNameAndType(size_t n) const +{ + size_t size = captured_columns.size(); + + String value_name {size > 1 ? "(" : "tuple("}; + DataTypes element_types; + element_types.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const auto & [value, type] = captured_columns[i].column->getValueNameAndType(n); + element_types.push_back(type); + if (i > 0) + value_name += ", "; + value_name += value; + } + value_name += ")"; + + return {value_name, std::make_shared(element_types)}; +} + #if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnFunction::insertFrom(const IColumn & src, size_t n) diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 8df9e23c0e8..a8e7016c01c 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -64,6 +64,8 @@ public: void get(size_t n, Field & res) const override; + std::pair getValueNameAndType(size_t n) const override; + StringRef getDataAt(size_t) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 3cc1c8919c0..99b171a411e 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -58,6 +58,10 @@ public: Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; } void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); } + std::pair getValueNameAndType(size_t n) const override + { + return getDictionary().getValueNameAndType(getIndexes().getUInt(n)); + } StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); } diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index a5511dfeeb4..ed340e9e7d0 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -79,6 +81,29 @@ void ColumnMap::get(size_t n, Field & res) const map.push_back(getNestedData()[offset + i]); } +std::pair ColumnMap::getValueNameAndType(size_t n) const +{ + const auto & offsets = getNestedColumn().getOffsets(); + size_t offset = offsets[n - 1]; + size_t size = offsets[n] - offsets[n - 1]; + + String value_name {"["}; + DataTypes element_types; + element_types.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const auto & [value, type] = getNestedData().getValueNameAndType(offset + i); + element_types.push_back(type); + if (i > 0) + value_name += ", "; + value_name += value; + } + value_name += "]"; + + return {value_name, std::make_shared(getLeastSupertype(element_types))}; +} + bool ColumnMap::isDefaultAt(size_t n) const { return nested->isDefaultAt(n); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 8dfa5bb5845..ade5b0ffaac 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -51,6 +51,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + std::pair getValueNameAndType(size_t n) const override; bool isDefaultAt(size_t n) const override; StringRef getDataAt(size_t n) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 6e8bd3fc70c..4cd738ed98d 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -115,6 +117,14 @@ void ColumnNullable::get(size_t n, Field & res) const getNestedColumn().get(n, res); } +std::pair ColumnNullable::getValueNameAndType(size_t n) const +{ + if (isNullAt(n)) + return {"NULL", std::make_shared(std::make_shared())}; + + return getNestedColumn().getValueNameAndType(n); +} + Float64 ColumnNullable::getFloat64(size_t n) const { if (isNullAt(n)) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 32ce66c5965..f522c8b9241 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -55,6 +55,7 @@ public: bool isNullAt(size_t n) const override { return assert_cast(*null_map).getData()[n] != 0;} Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + std::pair getValueNameAndType(size_t n) const override; bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } Float64 getFloat64(size_t n) const override; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index f4121435be9..d7607cb233d 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -265,6 +267,81 @@ void ColumnObject::get(size_t n, Field & res) const res = (*this)[n]; } +std::pair ColumnObject::getValueNameAndType(size_t n) const +{ + WriteBufferFromOwnString wb; + wb << '{'; + + bool first = true; + + for (const auto & [path, column] : typed_paths) + { + const auto & [value, type] = column->getValueNameAndType(n); + + if (first) + first = false; + else + wb << ", "; + + writeDoubleQuoted(path, wb); + wb << ": " << value; + } + + for (const auto & [path, column] : dynamic_paths_ptrs) + { + /// Output only non-null values from dynamic paths. We cannot distinguish cases when + /// dynamic path has Null value and when it's absent in the row and consider them equivalent. + if (column->isNullAt(n)) + continue; + + const auto & [value, type] = column->getValueNameAndType(n); + + if (first) + first = false; + else + wb << ", "; + + writeDoubleQuoted(path, wb); + wb << ": " << value; + } + + const auto & shared_data_offsets = getSharedDataOffsets(); + const auto [shared_paths, shared_values] = getSharedDataPathsAndValues(); + size_t start = shared_data_offsets[static_cast(n) - 1]; + size_t end = shared_data_offsets[n]; + for (size_t i = start; i != end; ++i) + { + if (first) + first = false; + else + wb << ", "; + + String path = shared_paths->getDataAt(i).toString(); + writeDoubleQuoted(path, wb); + + auto value_data = shared_values->getDataAt(i); + ReadBufferFromMemory buf(value_data.data, value_data.size); + auto decoded_type = decodeDataType(buf); + + if (isNothing(decoded_type)) + { + wb << ": NULL"; + continue; + } + + const auto column = decoded_type->createColumn(); + decoded_type->getDefaultSerialization()->deserializeBinary(*column, buf, getFormatSettings()); + + const auto & [value, type] = column->getValueNameAndType(n); + + wb << ": " << value; + } + + wb << "}"; + + return {wb.str(), std::make_shared(DataTypeObject::SchemaFormat::JSON)}; +} + bool ColumnObject::isDefaultAt(size_t n) const { for (const auto & [path, column] : typed_paths) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 7b8a381d571..6e398d0b6d0 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -107,6 +107,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + std::pair getValueNameAndType(size_t n) const override; bool isDefaultAt(size_t n) const override; StringRef getDataAt(size_t n) const override; diff --git a/src/Columns/ColumnObjectDeprecated.cpp b/src/Columns/ColumnObjectDeprecated.cpp index d03b1d0df82..79657acd9bf 100644 --- a/src/Columns/ColumnObjectDeprecated.cpp +++ b/src/Columns/ColumnObjectDeprecated.cpp @@ -1,3 +1,6 @@ +#include +#include +#include #include #include #include @@ -237,6 +240,33 @@ void ColumnObjectDeprecated::Subcolumn::get(size_t n, Field & res) const throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } +std::pair ColumnObjectDeprecated::Subcolumn::getValueNameAndType(size_t n) const +{ + if (isFinalized()) + return getFinalizedColumn().getValueNameAndType(n); + + size_t ind = n; + if (ind < num_of_defaults_in_prefix) + return least_common_type.get()->createColumnConstWithDefaultValue(1)->getValueNameAndType(0); + + ind -= num_of_defaults_in_prefix; + for (const auto & part : data) + { + if (ind < part->size()) + { + Field field; + part->get(ind, field); + const auto column = least_common_type.get()->createColumn(); + column->insert(convertFieldToTypeOrThrow(field, *least_common_type.get())); + return column->getValueNameAndType(0); + } + + ind -= part->size(); + } + + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); +} + void ColumnObjectDeprecated::Subcolumn::checkTypes() const { DataTypes prefix_types; @@ -763,6 +793,30 @@ void ColumnObjectDeprecated::get(size_t n, Field & res) const } } +std::pair ColumnObjectDeprecated::getValueNameAndType(size_t n) const +{ + WriteBufferFromOwnString wb; + wb << '{'; + + bool first = true; + + for (const auto & entry : subcolumns) + { + if (first) + first = false; + else + wb << ", "; + + writeDoubleQuoted(entry->path.getPath(), wb); + const auto & [value, type] = entry->data.getValueNameAndType(n); + wb << ": " << value; + } + + wb << "}"; + + return {wb.str(), std::make_shared(DataTypeObject::SchemaFormat::JSON)}; +} + #if !defined(DEBUG_OR_SANITIZER_BUILD) void ColumnObjectDeprecated::insertFrom(const IColumn & src, size_t n) #else diff --git a/src/Columns/ColumnObjectDeprecated.h b/src/Columns/ColumnObjectDeprecated.h index 29e2d8f0709..f3e90d89b9c 100644 --- a/src/Columns/ColumnObjectDeprecated.h +++ b/src/Columns/ColumnObjectDeprecated.h @@ -69,6 +69,7 @@ public: size_t byteSize() const; size_t allocatedBytes() const; void get(size_t n, Field & res) const; + std::pair getValueNameAndType(size_t n) const; bool isFinalized() const; const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); } @@ -220,6 +221,7 @@ public: void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + std::pair getValueNameAndType(size_t n) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index a0e47e65fc6..ee82f4f6416 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -94,6 +94,11 @@ void ColumnSparse::get(size_t n, Field & res) const values->get(getValueIndex(n), res); } +std::pair ColumnSparse::getValueNameAndType(size_t n) const +{ + return values->getValueNameAndType(getValueIndex(n)); +} + bool ColumnSparse::getBool(size_t n) const { return values->getBool(getValueIndex(n)); diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 619dce63c1e..835dee80025 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -65,6 +65,7 @@ public: bool isNullAt(size_t n) const override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t) const override; bool getBool(size_t n) const override; Float64 getFloat64(size_t n) const override; Float32 getFloat32(size_t n) const override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c2371412437..043d22cac50 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -2,6 +2,8 @@ #include +#include +#include #include #include #include @@ -109,6 +111,13 @@ public: res = std::string_view{reinterpret_cast(&chars[offsetAt(n)]), sizeAt(n) - 1}; } + std::pair getValueNameAndType(size_t n) const override + { + WriteBufferFromOwnString wb; + writeQuoted(std::string_view{reinterpret_cast(&chars[offsetAt(n)]), sizeAt(n) - 1}, wb); + return {wb.str(), std::make_shared()}; + } + StringRef getDataAt(size_t n) const override { chassert(n < size()); diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 28e5f03cc3c..80f567b7d38 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -148,6 +149,28 @@ void ColumnTuple::get(size_t n, Field & res) const res_tuple.push_back((*columns[i])[n]); } +std::pair ColumnTuple::getValueNameAndType(size_t n) const +{ + const size_t tuple_size = columns.size(); + + String value_name {tuple_size > 1 ? "(" : "tuple("}; + + DataTypes element_types; + element_types.reserve(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) + { + const auto & [value, type] = columns[i]->getValueNameAndType(n); + element_types.push_back(type); + if (i > 0) + value_name += ", "; + value_name += value; + } + value_name += ")"; + + return {value_name, std::make_shared(element_types)}; +} + bool ColumnTuple::isDefaultAt(size_t n) const { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index d5eee911edc..fda9743e178 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -59,6 +59,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t n) const override; bool isDefaultAt(size_t n) const override; StringRef getDataAt(size_t n) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index ffa7c311e9e..4784ee3a09a 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -74,6 +74,10 @@ public: Field operator[](size_t n) const override { return (*getNestedColumn())[n]; } void get(size_t n, Field & res) const override { getNestedColumn()->get(n, res); } + std::pair getValueNameAndType(size_t n) const override + { + return getNestedColumn()->getValueNameAndType(n); + } bool isDefaultAt(size_t n) const override { return n == 0; } StringRef getDataAt(size_t n) const override { return getNestedColumn()->getDataAt(n); } UInt64 get64(size_t n) const override { return getNestedColumn()->get64(n); } diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index 2fa59b8e33c..a576b0c4ee9 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -1,3 +1,5 @@ +#include +#include #include #include @@ -404,6 +406,15 @@ void ColumnVariant::get(size_t n, Field & res) const variants[discr]->get(offsetAt(n), res); } +std::pair ColumnVariant::getValueNameAndType(size_t n) const +{ + Discriminator discr = localDiscriminatorAt(n); + if (discr == NULL_DISCRIMINATOR) + return {"NULL", std::make_shared(std::make_shared())}; + + return variants[discr]->getValueNameAndType(offsetAt(n)); +} + bool ColumnVariant::isDefaultAt(size_t n) const { return localDiscriminatorAt(n) == NULL_DISCRIMINATOR; diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h index a68a961169c..41d8079e8a4 100644 --- a/src/Columns/ColumnVariant.h +++ b/src/Columns/ColumnVariant.h @@ -173,6 +173,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + virtual std::pair getValueNameAndType(size_t n) const override; bool isDefaultAt(size_t n) const override; bool isNullAt(size_t n) const override; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 8f81da86375..9d48b01709a 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -207,6 +209,13 @@ public: res = (*this)[n]; } + std::pair getValueNameAndType(size_t n) const override + { + assert(n < data.size()); /// This assert is more strict than the corresponding assert inside PODArray. + const auto & val = castToNearestFieldType(data[n]); + return {FieldVisitorToString()(val), FieldToDataType()(val)}; + } + UInt64 get64(size_t n) const override; Float64 getFloat64(size_t n) const override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index c77b089812e..7c0c5b709d0 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -34,6 +34,8 @@ class ColumnGathererStream; class Field; class WeakHash32; class ColumnConst; +class IDataType; +using DataTypePtr = std::shared_ptr; /// A range of column values between row indexes `from` and `to`. The name "equal range" is due to table sorting as its main use case: With /// a PRIMARY KEY (c_pk1, c_pk2, ...), the first PK column is fully sorted. The second PK column is sorted within equal-value runs of the @@ -144,6 +146,8 @@ public: /// Like the previous one, but avoids extra copying if Field is in a container, for example. virtual void get(size_t n, Field & res) const = 0; + virtual std::pair getValueNameAndType(size_t) const = 0; + /// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception) /// Is used to optimize some computations (in aggregation, for example). [[nodiscard]] virtual StringRef getDataAt(size_t n) const = 0; diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp index 5b220a4eefd..1d1d9f53cec 100644 --- a/src/Columns/IColumnDummy.cpp +++ b/src/Columns/IColumnDummy.cpp @@ -25,6 +25,11 @@ void IColumnDummy::get(size_t, Field &) const throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value from {}", getName()); } +std::pair IColumnDummy::getValueNameAndType(size_t) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get value name and type from {}", getName()); +} + void IColumnDummy::insert(const Field &) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert element into {}", getName()); diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 40d410e207d..df8e0319c60 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -40,6 +40,7 @@ public: Field operator[](size_t) const override; void get(size_t, Field &) const override; + virtual std::pair getValueNameAndType(size_t n) const override; void insert(const Field &) override; bool tryInsert(const Field &) override { return false; } bool isDefaultAt(size_t) const override; diff --git a/src/Common/FieldVisitorToString.cpp b/src/Common/FieldVisitorToString.cpp index 2148bac20d1..937e261d0cd 100644 --- a/src/Common/FieldVisitorToString.cpp +++ b/src/Common/FieldVisitorToString.cpp @@ -51,6 +51,18 @@ static inline String formatQuoted(T x) return wb.str(); } +template +static inline String formatQuoted(const Decimal & x, UInt32 scale) +{ + WriteBufferFromOwnString wb; + + writeChar('\'', wb); + writeText(x, scale, wb, {}); + writeChar('\'', wb); + + return wb.str(); +} + /** In contrast to writeFloatText (and writeQuoted), * even if number looks like integer after formatting, prints decimal point nevertheless (for example, Float64(1) is printed as 1.). * - because resulting text must be able to be parsed back as Float64 by query parser (otherwise it will be parsed as integer). @@ -81,6 +93,10 @@ String FieldVisitorToString::operator() (const DecimalField & x) cons String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const DecimalField & x) const { return formatQuoted(x); } +String FieldVisitorToString::operator() (const Decimal32 & x, UInt32 scale) const { return formatQuoted(x, scale); } +String FieldVisitorToString::operator() (const Decimal64 & x, UInt32 scale) const { return formatQuoted(x, scale); } +String FieldVisitorToString::operator() (const Decimal128 & x, UInt32 scale) const { return formatQuoted(x, scale); } +String FieldVisitorToString::operator() (const Decimal256 & x, UInt32 scale) const { return formatQuoted(x, scale); } String FieldVisitorToString::operator() (const Int128 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UInt128 & x) const { return formatQuoted(x); } String FieldVisitorToString::operator() (const UInt256 & x) const { return formatQuoted(x); } diff --git a/src/Common/FieldVisitorToString.h b/src/Common/FieldVisitorToString.h index ea4b43ead08..8f603b1eac2 100644 --- a/src/Common/FieldVisitorToString.h +++ b/src/Common/FieldVisitorToString.h @@ -29,6 +29,10 @@ public: String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; String operator() (const DecimalField & x) const; + String operator() (const Decimal32 & x, UInt32 scale) const; + String operator() (const Decimal64 & x, UInt32 scale) const; + String operator() (const Decimal128 & x, UInt32 scale) const; + String operator() (const Decimal256 & x, UInt32 scale) const; String operator() (const AggregateFunctionStateData & x) const; String operator() (const CustomType & x) const; String operator() (const bool & x) const; diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 536d2656021..47a4629b85d 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -130,6 +130,34 @@ DataTypePtr FieldToDataType::operator() (const DecimalField(Type::maxPrecision(), x.getScale()); } +template +DataTypePtr FieldToDataType::operator() (const Decimal32 &, UInt32 scale) const +{ + using Type = DataTypeDecimal; + return std::make_shared(Type::maxPrecision(), scale); +} + +template +DataTypePtr FieldToDataType::operator() (const Decimal64 &, UInt32 scale) const +{ + using Type = DataTypeDecimal; + return std::make_shared(Type::maxPrecision(), scale); +} + +template +DataTypePtr FieldToDataType::operator() (const Decimal128 &, UInt32 scale) const +{ + using Type = DataTypeDecimal; + return std::make_shared(Type::maxPrecision(), scale); +} + +template +DataTypePtr FieldToDataType::operator() (const Decimal256 &, UInt32 scale) const +{ + using Type = DataTypeDecimal; + return std::make_shared(Type::maxPrecision(), scale); +} + template DataTypePtr FieldToDataType::operator() (const Array & x) const { diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index 8febadc1a0d..2c89f74e0b0 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -40,6 +40,10 @@ public: DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const DecimalField & x) const; DataTypePtr operator() (const DecimalField & x) const; + DataTypePtr operator() (const Decimal32 & x, UInt32 scale) const; + DataTypePtr operator() (const Decimal64 & x, UInt32 scale) const; + DataTypePtr operator() (const Decimal128 & x, UInt32 scale) const; + DataTypePtr operator() (const Decimal256 & x, UInt32 scale) const; DataTypePtr operator() (const AggregateFunctionStateData & x) const; DataTypePtr operator() (const CustomType & x) const; DataTypePtr operator() (const UInt256 & x) const; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index e8b15c0d0c4..3f151e5d798 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -60,8 +60,8 @@ namespace */ String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node) { - const auto & [name, type, field_type] = constant_node.getFieldAttributes(); - bool requires_cast_call = constant_node.hasSourceExpression() || ConstantNode::requiresCastCall(type, field_type, constant_node.getResultType()); + const auto & [name, type] = constant_node.getValueNameAndType(); + bool requires_cast_call = constant_node.hasSourceExpression() || ConstantNode::requiresCastCall(type, constant_node.getResultType()); WriteBufferFromOwnString buffer; if (requires_cast_call) @@ -336,7 +336,8 @@ public: static String calculateConstantActionNodeName(const ConstantNode & constant_node) { - return std::get(constant_node.getFieldAttributes()); + const auto & [name, type] = constant_node.getValueNameAndType(); + return name + "_" + constant_node.getResultType()->getName(); } String calculateWindowNodeActionName(const QueryTreeNodePtr & function_nodew_node_, const QueryTreeNodePtr & window_node_)