Merge remote-tracking branch 'origin/master' into filecache-better-try-reserve

This commit is contained in:
kssenii 2024-03-17 15:26:52 +01:00
commit 13c10fce1a
37 changed files with 1201 additions and 518 deletions

View File

@ -55,7 +55,7 @@ CREATE TABLE criteo_log (
) ENGINE = Log;
```
Download the data:
Insert the data:
``` bash
$ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done

View File

@ -9,7 +9,7 @@
#include <Common/ProfilingScopedRWLock.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>
#include <QueryPipeline/QueryPipelineBuilder.h>

View File

@ -0,0 +1,42 @@
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Core/Block.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <QueryPipeline/QueryPipeline.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async)
: async_executor(async ? std::make_unique<PullingAsyncPipelineExecutor>(pipeline_) : nullptr)
, executor(async ? nullptr : std::make_unique<PullingPipelineExecutor>(pipeline_))
{
}
bool DictionaryPipelineExecutor::pull(Block & block)
{
if (async_executor)
{
while (true)
{
bool has_data = async_executor->pull(block);
if (has_data && !block)
continue;
return has_data;
}
}
else if (executor)
return executor->pull(block);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized");
}
DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default;
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <memory>
namespace DB
{
class Block;
class QueryPipeline;
class PullingAsyncPipelineExecutor;
class PullingPipelineExecutor;
/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor
class DictionaryPipelineExecutor
{
public:
DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async);
bool pull(Block & block);
~DictionaryPipelineExecutor();
private:
std::unique_ptr<PullingAsyncPipelineExecutor> async_executor;
std::unique_ptr<PullingPipelineExecutor> executor;
};
}

View File

@ -9,15 +9,11 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/SettingsChanges.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
}
@ -135,29 +131,4 @@ String TransformWithAdditionalColumns::getName() const
return "TransformWithAdditionalColumns";
}
DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async)
: async_executor(async ? std::make_unique<PullingAsyncPipelineExecutor>(pipeline_) : nullptr)
, executor(async ? nullptr : std::make_unique<PullingPipelineExecutor>(pipeline_))
{}
bool DictionaryPipelineExecutor::pull(Block & block)
{
if (async_executor)
{
while (true)
{
bool has_data = async_executor->pull(block);
if (has_data && !block)
continue;
return has_data;
}
}
else if (executor)
return executor->pull(block);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized");
}
DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default;
}

View File

@ -16,10 +16,6 @@ namespace DB
struct DictionaryStructure;
class SettingsChanges;
class PullingPipelineExecutor;
class PullingAsyncPipelineExecutor;
class QueryPipeline;
/// For simple key
Block blockForIds(
@ -55,17 +51,4 @@ private:
size_t current_range_index = 0;
};
/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor
class DictionaryPipelineExecutor
{
public:
DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async);
bool pull(Block & block);
~DictionaryPipelineExecutor();
private:
std::unique_ptr<PullingAsyncPipelineExecutor> async_executor;
std::unique_ptr<PullingPipelineExecutor> executor;
};
}

View File

@ -41,6 +41,33 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying
#undef map_item
#define CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Decimal32) \
M(Decimal64) \
M(Decimal128) \
M(Decimal256) \
M(DateTime64) \
M(Float32) \
M(Float64) \
M(UUID) \
M(IPv4) \
M(IPv6) \
M(String) \
M(Array)
/// Min and max lifetimes for a dictionary or its entry
using DictionaryLifetime = ExternalLoadableLifetime;

View File

@ -15,7 +15,7 @@
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>

View File

@ -10,6 +10,7 @@
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>

View File

@ -6,7 +6,7 @@
#include <Dictionaries/DictionaryHelpers.h>
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/HierarchyDictionariesUtils.h>
#include <Dictionaries/HashedDictionaryCollectionType.h>
#include <Dictionaries/HashedDictionaryCollectionTraits.h>

View File

@ -19,6 +19,7 @@
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Functions/FunctionHelpers.h>

View File

@ -1,6 +1,5 @@
#include "PolygonDictionary.h"
#include <numeric>
#include <cmath>
#include <base/sort.h>
@ -15,7 +14,7 @@
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Dictionaries/DictionaryFactory.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
namespace DB

View File

@ -0,0 +1,225 @@
#include <Dictionaries/RangeHashedDictionary.h>
namespace DB
{
template <DictionaryKeyType dictionary_key_type>
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
const std::string & attribute_name,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
DefaultOrFilter default_or_filter) const
{
bool is_short_circuit = std::holds_alternative<RefFilter>(default_or_filter);
assert(is_short_circuit || std::holds_alternative<RefDefault>(default_or_filter));
if (dictionary_key_type == DictionaryKeyType::Complex)
{
auto key_types_copy = key_types;
key_types_copy.pop_back();
dict_struct.validateKeyTypes(key_types_copy);
}
ColumnPtr result;
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
const auto & attribute = attributes[attribute_index];
/// Cast range column to storage type
Columns modified_key_columns = key_columns;
const ColumnPtr & range_storage_column = key_columns.back();
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type);
size_t keys_size = key_columns.front()->size();
bool is_attribute_nullable = attribute.is_value_nullable.has_value();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (is_attribute_nullable)
{
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if (is_short_circuit)
{
IColumn::Filter & default_mask = std::get<RefFilter>(default_or_filter).get();
size_t keys_found = 0;
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, const Array & value, bool)
{
out->insert(value);
},
default_mask);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_mask);
else
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, StringRef value, bool)
{
out->insertData(value.data, value.size);
},
default_mask);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_mask);
else
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool)
{
out[row] = value;
},
default_mask);
out.resize(keys_found);
}
if (is_attribute_nullable)
vec_null_map_to->resize(keys_found);
}
else
{
const ColumnPtr & default_values_column = std::get<RefDefault>(default_or_filter).get();
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(
dictionary_attribute.null_value, default_values_column);
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, const Array & value, bool)
{
out->insert(value);
},
default_value_extractor);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, StringRef value, bool)
{
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool)
{
out[row] = value;
},
default_value_extractor);
}
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (is_attribute_nullable)
result = ColumnNullable::create(result, std::move(col_null_map_to));
return result;
}
template
ColumnPtr RangeHashedDictionary<DictionaryKeyType::Simple>::getColumn(
const std::string & attribute_name,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
DefaultOrFilter default_or_filter) const;
template
ColumnPtr RangeHashedDictionary<DictionaryKeyType::Complex>::getColumn(
const std::string & attribute_name,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
DefaultOrFilter default_or_filter) const;
}

View File

@ -15,6 +15,8 @@
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/DictionaryHelpers.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
@ -29,11 +31,6 @@
#include <Functions/FunctionHelpers.h>
#include <Interpreters/castColumn.h>
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
namespace DB
{
@ -46,7 +43,6 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
}
enum class RangeHashedDictionaryLookupStrategy : uint8_t
{
min,
@ -238,18 +234,21 @@ private:
static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute);
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
template <typename ValueType>
using ValueSetterFunc = std::function<void(size_t, const ValueType &, bool)>;
template <typename ValueType, bool is_nullable, typename DefaultValueExtractor>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
ValueSetterFunc<ValueType> && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, bool is_nullable, typename ValueSetter>
template <typename ValueType, bool is_nullable>
size_t getItemsShortCircuitImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
ValueSetterFunc<ValueType> && set_value,
IColumn::Filter & default_mask) const;
ColumnPtr getColumnInternal(
@ -341,209 +340,6 @@ RangeHashedDictionary<dictionary_key_type>::RangeHashedDictionary(
calculateBytesAllocated();
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumn(
const std::string & attribute_name,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
DefaultOrFilter default_or_filter) const
{
bool is_short_circuit = std::holds_alternative<RefFilter>(default_or_filter);
assert(is_short_circuit || std::holds_alternative<RefDefault>(default_or_filter));
if (dictionary_key_type == DictionaryKeyType::Complex)
{
auto key_types_copy = key_types;
key_types_copy.pop_back();
dict_struct.validateKeyTypes(key_types_copy);
}
ColumnPtr result;
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
const auto & attribute = attributes[attribute_index];
/// Cast range column to storage type
Columns modified_key_columns = key_columns;
const ColumnPtr & range_storage_column = key_columns.back();
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""};
modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type);
size_t keys_size = key_columns.front()->size();
bool is_attribute_nullable = attribute.is_value_nullable.has_value();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (is_attribute_nullable)
{
col_null_map_to = ColumnUInt8::create(keys_size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size);
if (is_short_circuit)
{
IColumn::Filter & default_mask = std::get<RefFilter>(default_or_filter).get();
size_t keys_found = 0;
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, const Array & value, bool)
{
out->insert(value);
},
default_mask);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_mask);
else
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, StringRef value, bool)
{
out->insertData(value.data, value.size);
},
default_mask);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
keys_found = getItemsShortCircuitImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_mask);
else
keys_found = getItemsShortCircuitImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool)
{
out[row] = value;
},
default_mask);
out.resize(keys_found);
}
if (is_attribute_nullable)
vec_null_map_to->resize(keys_found);
}
else
{
const ColumnPtr & default_values_column = std::get<RefDefault>(default_or_filter).get();
DictionaryDefaultValueExtractor<AttributeType> default_value_extractor(
dictionary_attribute.null_value, default_values_column);
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, const Array & value, bool)
{
out->insert(value);
},
default_value_extractor);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t, StringRef value, bool)
{
out->insertData(value.data, value.size);
},
default_value_extractor);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsImpl<ValueType, true>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_value_extractor);
else
getItemsImpl<ValueType, false>(
attribute,
modified_key_columns,
[&](size_t row, const auto value, bool)
{
out[row] = value;
},
default_value_extractor);
}
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (is_attribute_nullable)
result = ColumnNullable::create(result, std::move(col_null_map_to));
return result;
}
template <DictionaryKeyType dictionary_key_type>
ColumnPtr RangeHashedDictionary<dictionary_key_type>::getColumnInternal(
const std::string & attribute_name,
@ -842,224 +638,6 @@ typename RangeHashedDictionary<dictionary_key_type>::Attribute RangeHashedDictio
return attribute;
}
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
size_t keys_found = 0;
const ColumnPtr & range_column = key_columns.back();
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
callOnRangeType(dict_struct.range_min->type, [&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using RangeColumnType = typename Types::LeftType;
using RangeStorageType = typename RangeColumnType::ValueType;
using RangeInterval = Interval<RangeStorageType>;
const auto * range_column_typed = typeid_cast<const RangeColumnType *>(range_column.get());
if (!range_column_typed)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Dictionary {} range column type should be equal to {}",
getFullName(),
dict_struct.range_min->type->getName());
const auto & range_column_data = range_column_typed->getData();
const auto & key_attribute_container = std::get<KeyAttributeContainerType<RangeStorageType>>(key_attribute.container);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
const auto it = key_attribute_container.find(key);
if (it)
{
const auto date = range_column_data[key_index];
const auto & interval_tree = it->getMapped();
size_t value_index = 0;
std::optional<RangeInterval> range;
interval_tree.find(date, [&](auto & interval, auto & interval_value_index)
{
if (range)
{
if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range)
{
range = interval;
value_index = interval_value_index;
}
else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range)
{
range = interval;
value_index = interval_value_index;
}
}
else
{
range = interval;
value_index = interval_value_index;
}
return true;
});
if (range.has_value())
{
++keys_found;
AttributeType value = attribute_container[value_index];
if constexpr (is_nullable)
{
bool is_null = (*attribute.is_value_nullable)[value_index];
set_value(key_index, value, is_null);
}
else
{
set_value(key_index, value, false);
}
keys_extractor.rollbackCurrentKey();
continue;
}
}
if constexpr (is_nullable)
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
else
set_value(key_index, default_value_extractor[key_index], false);
keys_extractor.rollbackCurrentKey();
}
});
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter>
size_t RangeHashedDictionary<dictionary_key_type>::getItemsShortCircuitImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
IColumn::Filter & default_mask) const
{
const auto & attribute_container = std::get<AttributeContainerType<AttributeType>>(attribute.container);
size_t keys_found = 0;
const ColumnPtr & range_column = key_columns.back();
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
default_mask.resize(keys_size);
callOnRangeType(dict_struct.range_min->type, [&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using RangeColumnType = typename Types::LeftType;
using RangeStorageType = typename RangeColumnType::ValueType;
using RangeInterval = Interval<RangeStorageType>;
const auto * range_column_typed = typeid_cast<const RangeColumnType *>(range_column.get());
if (!range_column_typed)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Dictionary {} range column type should be equal to {}",
getFullName(),
dict_struct.range_min->type->getName());
const auto & range_column_data = range_column_typed->getData();
const auto & key_attribute_container = std::get<KeyAttributeContainerType<RangeStorageType>>(key_attribute.container);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
const auto it = key_attribute_container.find(key);
if (it)
{
const auto date = range_column_data[key_index];
const auto & interval_tree = it->getMapped();
size_t value_index = 0;
std::optional<RangeInterval> range;
interval_tree.find(date, [&](auto & interval, auto & interval_value_index)
{
if (range)
{
if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range)
{
range = interval;
value_index = interval_value_index;
}
else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range)
{
range = interval;
value_index = interval_value_index;
}
}
else
{
range = interval;
value_index = interval_value_index;
}
return true;
});
if (range.has_value())
{
default_mask[key_index] = 0;
++keys_found;
AttributeType value = attribute_container[value_index];
if constexpr (is_nullable)
{
bool is_null = (*attribute.is_value_nullable)[value_index];
set_value(key_index, value, is_null);
}
else
{
set_value(key_index, value, false);
}
keys_extractor.rollbackCurrentKey();
continue;
}
}
default_mask[key_index] = 1;
keys_extractor.rollbackCurrentKey();
}
});
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return keys_found;
}
template <DictionaryKeyType dictionary_key_type>
template <typename AttributeType, bool is_nullable, typename ValueSetter>
void RangeHashedDictionary<dictionary_key_type>::getItemsInternalImpl(

View File

@ -0,0 +1,133 @@
#include <Dictionaries/RangeHashedDictionary.h>
#define INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType, IsNullable, AttributeType, ValueType) \
template void RangeHashedDictionary<DictionaryKeyType>::getItemsImpl<ValueType, IsNullable, DictionaryDefaultValueExtractor<AttributeType>>( \
const Attribute & attribute,\
const Columns & key_columns,\
typename RangeHashedDictionary<DictionaryKeyType>::ValueSetterFunc<ValueType> && set_value,\
DictionaryDefaultValueExtractor<AttributeType> & default_value_extractor) const;
#define INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \
INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, true, AttributeType, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, false, AttributeType, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, true, AttributeType, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, false, AttributeType, DictionaryValueType<AttributeType>)
namespace DB
{
template <DictionaryKeyType dictionary_key_type>
template <typename ValueType, bool is_nullable, typename DefaultValueExtractor>
void RangeHashedDictionary<dictionary_key_type>::getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
typename RangeHashedDictionary<dictionary_key_type>::ValueSetterFunc<ValueType> && set_value,
DefaultValueExtractor & default_value_extractor) const
{
const auto & attribute_container = std::get<AttributeContainerType<ValueType>>(attribute.container);
size_t keys_found = 0;
const ColumnPtr & range_column = key_columns.back();
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
callOnRangeType(
dict_struct.range_min->type,
[&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using RangeColumnType = typename Types::LeftType;
using RangeStorageType = typename RangeColumnType::ValueType;
using RangeInterval = Interval<RangeStorageType>;
const auto * range_column_typed = typeid_cast<const RangeColumnType *>(range_column.get());
if (!range_column_typed)
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Dictionary {} range column type should be equal to {}",
getFullName(),
dict_struct.range_min->type->getName());
const auto & range_column_data = range_column_typed->getData();
const auto & key_attribute_container = std::get<KeyAttributeContainerType<RangeStorageType>>(key_attribute.container);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
const auto it = key_attribute_container.find(key);
if (it)
{
const auto date = range_column_data[key_index];
const auto & interval_tree = it->getMapped();
size_t value_index = 0;
std::optional<RangeInterval> range;
interval_tree.find(
date,
[&](auto & interval, auto & interval_value_index)
{
if (range)
{
if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range)
{
range = interval;
value_index = interval_value_index;
}
else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range)
{
range = interval;
value_index = interval_value_index;
}
}
else
{
range = interval;
value_index = interval_value_index;
}
return true;
});
if (range.has_value())
{
++keys_found;
ValueType value = attribute_container[value_index];
if constexpr (is_nullable)
{
bool is_null = (*attribute.is_value_nullable)[value_index];
set_value(key_index, value, is_null);
}
else
{
set_value(key_index, value, false);
}
keys_extractor.rollbackCurrentKey();
continue;
}
}
if constexpr (is_nullable)
set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index));
else
set_value(key_index, default_value_extractor[key_index], false);
keys_extractor.rollbackCurrentKey();
}
});
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
}

View File

@ -0,0 +1,10 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64);
}

View File

@ -0,0 +1,7 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float32);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float64);
}

View File

@ -0,0 +1,11 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int8);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int16);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int32);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int64);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int128);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int256);
}

View File

@ -0,0 +1,10 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UUID);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv4);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv6);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(String);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Array);
}

View File

@ -0,0 +1,11 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt8);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt16);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt32);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt64);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt128);
INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt256);
}

View File

@ -0,0 +1,132 @@
#include <Dictionaries/RangeHashedDictionary.h>
#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \
template size_t RangeHashedDictionary<DictionaryKeyType>::getItemsShortCircuitImpl<ValueType, IsNullable>( \
const Attribute & attribute, \
const Columns & key_columns, \
typename RangeHashedDictionary<DictionaryKeyType>::ValueSetterFunc<ValueType> && set_value, \
IColumn::Filter & default_mask) const;
#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, true, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, false, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, true, DictionaryValueType<AttributeType>) \
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, false, DictionaryValueType<AttributeType>)
namespace DB
{
template <DictionaryKeyType dictionary_key_type>
template <typename ValueType, bool is_nullable>
size_t RangeHashedDictionary<dictionary_key_type>::getItemsShortCircuitImpl(
const Attribute & attribute,
const Columns & key_columns,
typename RangeHashedDictionary<dictionary_key_type>::ValueSetterFunc<ValueType> && set_value,
IColumn::Filter & default_mask) const
{
const auto & attribute_container = std::get<AttributeContainerType<ValueType>>(attribute.container);
size_t keys_found = 0;
const ColumnPtr & range_column = key_columns.back();
auto key_columns_copy = key_columns;
key_columns_copy.pop_back();
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena());
const size_t keys_size = keys_extractor.getKeysSize();
default_mask.resize(keys_size);
callOnRangeType(
dict_struct.range_min->type,
[&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using RangeColumnType = typename Types::LeftType;
using RangeStorageType = typename RangeColumnType::ValueType;
using RangeInterval = Interval<RangeStorageType>;
const auto * range_column_typed = typeid_cast<const RangeColumnType *>(range_column.get());
if (!range_column_typed)
throw Exception(
ErrorCodes::TYPE_MISMATCH,
"Dictionary {} range column type should be equal to {}",
getFullName(),
dict_struct.range_min->type->getName());
const auto & range_column_data = range_column_typed->getData();
const auto & key_attribute_container = std::get<KeyAttributeContainerType<RangeStorageType>>(key_attribute.container);
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
const auto it = key_attribute_container.find(key);
if (it)
{
const auto date = range_column_data[key_index];
const auto & interval_tree = it->getMapped();
size_t value_index = 0;
std::optional<RangeInterval> range;
interval_tree.find(
date,
[&](auto & interval, auto & interval_value_index)
{
if (range)
{
if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range)
{
range = interval;
value_index = interval_value_index;
}
else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range)
{
range = interval;
value_index = interval_value_index;
}
}
else
{
range = interval;
value_index = interval_value_index;
}
return true;
});
if (range.has_value())
{
default_mask[key_index] = 0;
++keys_found;
ValueType value = attribute_container[value_index];
if constexpr (is_nullable)
{
bool is_null = (*attribute.is_value_nullable)[value_index];
set_value(key_index, value, is_null);
}
else
{
set_value(key_index, value, false);
}
keys_extractor.rollbackCurrentKey();
continue;
}
}
default_mask[key_index] = 1;
keys_extractor.rollbackCurrentKey();
}
});
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
return keys_found;
}
}

View File

@ -0,0 +1,10 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64);
}

View File

@ -0,0 +1,7 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float32);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float64);
}

View File

@ -0,0 +1,11 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int8);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int16);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int32);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int64);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int128);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int256);
}

View File

@ -0,0 +1,10 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UUID);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv4);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv6);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(String);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Array);
}

View File

@ -0,0 +1,11 @@
#include <Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx>
namespace DB
{
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt8);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt16);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt32);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt64);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt128);
INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt256);
}

View File

@ -27,6 +27,7 @@
#include <Dictionaries/DictionaryHelpers.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryPipelineExecutor.h>
#include <Dictionaries/RegExpTreeDictionary.h>
#include <Dictionaries/YAMLRegExpTreeDictionarySource.h>

View File

@ -1,5 +1,8 @@
#include "RangeHashedDictionary.h"
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/DictionarySource.h>
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionarySourceHelpers.h>
#include <Dictionaries/DictionaryFactory.h>
namespace DB

View File

@ -1832,10 +1832,10 @@ using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl<NameFro
REGISTER_FUNCTION(FormatDateTime)
{
factory.registerFunction<FunctionFormatDateTime>();
factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name);
factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionFromUnixTimestamp>();
factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name);
factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionFormatDateTimeInJodaSyntax>();
factory.registerFunction<FunctionFromUnixTimestampInJodaSyntax>();

View File

@ -1942,7 +1942,7 @@ namespace
REGISTER_FUNCTION(ParseDateTime)
{
factory.registerFunction<FunctionParseDateTime>();
factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name);
factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionParseDateTimeOrZero>();
factory.registerFunction<FunctionParseDateTimeOrNull>();
factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::CaseInsensitive);

View File

@ -62,8 +62,8 @@ namespace DB
struct CachePriorityGuard : private boost::noncopyable
{
using Mutex = std::timed_mutex;
/// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks
/// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example
/// struct is used (not keyword `using`) to make CachePriorityGuard::Lock non-interchangable with other guards locks
/// so, we wouldn't be able to pass CachePriorityGuard::Lock to a function which accepts KeyGuard::Lock, for example
struct Lock : public std::unique_lock<Mutex>
{
using Base = std::unique_lock<Mutex>;

View File

@ -86,6 +86,7 @@ void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr &
continue;
auto replacement_table_expression = std::make_shared<TableNode>(storage, context);
replacement_table_expression->setAlias(node->getAlias());
if (auto table_expression_modifiers = table_node.getTableExpressionModifiers())
replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers);

View File

@ -885,6 +885,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
if (modified_query_info.table_expression)
{
auto replacement_table_expression = std::make_shared<TableNode>(storage, storage_lock, storage_snapshot_);
replacement_table_expression->setAlias(modified_query_info.table_expression->getAlias());
if (query_info.table_expression_modifiers)
replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers);
@ -1025,7 +1026,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
const auto & [database_name, storage, _, table_name] = storage_with_lock;
bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer;
auto storage_stage
= storage->getQueryProcessingStage(context, QueryProcessingStage::Complete, storage_snapshot_, modified_query_info);
= storage->getQueryProcessingStage(context, processed_stage, storage_snapshot_, modified_query_info);
builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
@ -1052,40 +1053,80 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
Block pipe_header = builder->getHeader();
if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database"))
if (allow_experimental_analyzer)
{
ColumnWithTypeAndName column;
column.name = "_database";
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(database_name));
String table_alias = modified_query_info.query_tree->as<QueryNode>()->getJoinTree()->as<TableNode>()->getAlias();
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database";
String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table";
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
if (has_database_virtual_column && common_header.has(database_column)
&& (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + database_name + "'_String")))
{
ColumnWithTypeAndName column;
column.name = database_column;
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(database_name));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
}
if (has_table_virtual_column && common_header.has(table_column)
&& (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + table_name + "'_String")))
{
ColumnWithTypeAndName column;
column.name = table_column;
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(table_name));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
}
}
if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table"))
else
{
ColumnWithTypeAndName column;
column.name = "_table";
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(table_name));
if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database"))
{
ColumnWithTypeAndName column;
column.name = "_database";
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(database_name));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
}
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table"))
{
ColumnWithTypeAndName column;
column.name = "_table";
column.type = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
column.column = column.type->createColumnConst(0, Field(table_name));
auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
auto adding_column_actions = std::make_shared<ExpressionActions>(
std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes));
builder->addSimpleTransform([&](const Block & stream_header)
{ return std::make_shared<ExpressionTransform>(stream_header, adding_column_actions); });
}
}
/// Subordinary tables could have different but convertible types, like numeric types of different width.
/// We must return streams with structure equals to structure of Merge table.
convertAndFilterSourceStream(
header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage);
header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage);
}
return builder;
@ -1116,13 +1157,13 @@ QueryPlan ReadFromMerge::createPlanForTable(
bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer;
auto storage_stage = storage->getQueryProcessingStage(modified_context,
QueryProcessingStage::Complete,
processed_stage,
storage_snapshot_,
modified_query_info);
QueryPlan plan;
if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns))
if (processed_stage <= storage_stage)
{
/// If there are only virtual columns in query, you must request at least one other column.
if (real_column_names.empty())
@ -1167,7 +1208,7 @@ QueryPlan ReadFromMerge::createPlanForTable(
row_policy_data_opt->addStorageFilter(source_step_with_filter);
}
}
else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns))
else if (processed_stage > storage_stage || allow_experimental_analyzer)
{
/// Maximum permissible parallelism is streams_num
modified_context->setSetting("max_threads", streams_num);

View File

@ -1,5 +1,4 @@
00223_shard_distributed_aggregation_memory_efficient
00717_merge_and_distributed
00725_memory_tracking
01062_pm_all_join_with_block_continuation
01083_expressions_in_engine_arguments

View File

@ -0,0 +1,77 @@
123
2009-02-14 00:31:30
[1,2,3]
123 Nullable(UInt8)
\N Nullable(UInt8)
1
255
['Hello','wo\'rld\\']
Hello wo\\\'rld\\\\
wo\'rld\\ wo\\\'rld\\\\
133 210
210
[123,456]
1 -1
[] [] Array(Nothing) Array(Array(Array(Tuple(UInt64, String))))
1970-01-01 01:00:00
2009-02-14 00:31:30.123456
1970-01-01 00:59:59.888889
2009-02-14 00:31:30
1970-01-01 01:00:00
2299-12-31 23:59:59.000000
2009-02-14
2009-02-14
123\0\0
123
123
123
123
123
123
123
123
123
123
String 123
123 UInt8
200 UInt8
123
123
1.1
1.10000000000000016387
18446744073709551615
[1.1,2.3]
[1.10000000000000016387,2.29999999999999967236]
Row 1:
──────
CAST('1.1', 'Decimal(30, 20)'): 1.1
CAST('1.1', 'Decimal(30, 20)'): 1.1
CAST(plus(1, 1), 'UInt8'): 2
-1
\N
0
255
123
Hello\0\0\0\0\0
Hello\0\0\0\0\0
123.45
2024-04-25 01:02:03
2024-04-25 01:02:03.000000
2024-04-25 01:02:03
2024-04-25 01:02:03.456789
2024-03-16 11:01:25
2024-03-16 19:01:25
2024-03-16 19:01:25
2024-03-16 11:01:25
123 \N \N \N
123 0 0 0
Nullable(UInt8) UInt8
123
123
123
\N
2024-04-25 2024-01-01 02:03:04 1 12
2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30
2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30
1986-04-25 13:00:00
14

View File

@ -0,0 +1,346 @@
SET session_timezone = 'Europe/Amsterdam';
-- Type conversion functions and operators.
-- 1. SQL standard CAST operator: `CAST(value AS Type)`.
SELECT CAST(123 AS String);
-- It converts between various data types, including parameterized data types
SELECT CAST(1234567890 AS DateTime('Europe/Amsterdam'));
-- and composite data types:
SELECT CAST('[1, 2, 3]' AS Array(UInt8));
-- Its return type depends on the setting `cast_keep_nullable`. If it is enabled, if the source argument type is Nullable, the resulting data type will be also Nullable, even if it is not written explicitly:
SET cast_keep_nullable = 1;
SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL'));
SET cast_keep_nullable = 0;
SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); -- { serverError CANNOT_PARSE_TEXT }
-- There are various type conversion rules, some worth noting.
-- Conversion between numeric types can involve implementation-defined overflow:
SELECT CAST(257 AS UInt8);
SELECT CAST(-1 AS UInt8);
-- Conversion from string acts like parsing, and for composite data types like Array, Tuple, it works in the same way as from the `Values` data format:
SELECT CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String));
-- '
-- While for simple data types, it does not interpret escape sequences:
SELECT arrayJoin(CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String))) AS x, CAST($$wo\'rld\\$$ AS FixedString(9)) AS y;
-- As conversion from String is similar to direct parsing rather than conversion from other types,
-- it can be stricter for numbers by not tolerating overflows in some cases:
SELECT CAST(-123 AS UInt8), CAST(1234 AS UInt8);
SELECT CAST('-123' AS UInt8); -- { serverError CANNOT_PARSE_NUMBER }
-- In some cases it still allows overflows, but it is implementation defined:
SELECT CAST('1234' AS UInt8);
-- Parsing from a string does not tolerate extra whitespace characters:
SELECT CAST(' 123' AS UInt8); -- { serverError CANNOT_PARSE_TEXT }
SELECT CAST('123 ' AS UInt8); -- { serverError CANNOT_PARSE_TEXT }
-- But for composite data types, it involves a more featured parser, that takes care of whitespace inside the data structures:
SELECT CAST('[ 123 ,456, ]' AS Array(UInt16));
-- Conversion from a floating point value to an integer will involve truncation towards zero:
SELECT CAST(1.9, 'Int64'), CAST(-1.9, 'Int64');
-- Conversion from NULL into a non-Nullable type will throw an exception, as well as conversions from denormal floating point numbers (NaN, inf, -inf) to an integer, or conversion between arrays of different dimensions.
-- However, you might find it amusing that an empty array of Nothing data type can be converted to arrays of any dimensions:
SELECT [] AS x, CAST(x AS Array(Array(Array(Tuple(UInt64, String))))) AS y, toTypeName(x), toTypeName(y);
-- Conversion between numbers and DateTime/Date data types interprets the number as the number of seconds/days from the Unix epoch,
-- where Unix epoch starts from 1970-01-01T00:00:00Z (the midnight of Gregorian year 1970 in UTC),
-- and the number of seconds don't count the coordination seconds, as in Unix.
-- For example, it is 1 AM in Amsterdam:
SELECT CAST(0 AS DateTime('Europe/Amsterdam'));
-- The numbers can be fractional and negative (for DateTime64):
SELECT CAST(1234567890.123456 AS DateTime64(6, 'Europe/Amsterdam'));
SELECT CAST(-0.111111 AS DateTime64(6, 'Europe/Amsterdam'));
-- If the result does not fit in the range of the corresponding time data types, it is truncated and saturated to the boundaries:
SELECT CAST(1234567890.123456 AS DateTime('Europe/Amsterdam'));
SELECT CAST(-1 AS DateTime('Europe/Amsterdam'));
SELECT CAST(1e20 AS DateTime64(6, 'Europe/Amsterdam'));
-- A special case is DateTime64(9) - the maximum resolution, where is does not cover the usual range,
-- and in this case, it throws an exception on overflow (I don't mind if we change this behavior in the future):
SELECT CAST(1e20 AS DateTime64(9, 'Europe/Amsterdam')); -- { serverError DECIMAL_OVERFLOW }
-- If a number is converted to a Date data type, the value is interpreted as the number of days since the Unix epoch,
-- but if the number is larger than the range of the data type, it is interpreted as a unix timestamp
-- (the number of seconds since the Unix epoch), similarly how it is done for the DateTime data type,
-- for convenience (while the internal representation of Date is the number of days,
-- often people want the unix timestamp to be also parsed into the Date data type):
SELECT CAST(14289 AS Date);
SELECT CAST(1234567890 AS Date);
-- When converting to a FixedString, if the length of the result data type is larger than the value, the result is padded with zero bytes:
SELECT CAST('123' AS FixedString(5)) FORMAT TSV;
-- But if it does not fit, an exception is thrown:
SELECT CAST('12345' AS FixedString(3)) FORMAT TSV; -- { serverError TOO_LARGE_STRING_SIZE }
-- The operator is case-insensitive:
SELECT CAST(123 AS String);
SELECT cast(123 AS String);
SELECT Cast(123 AS String);
-- 2. The functional form of this operator: `CAST(value, 'Type')`:
SELECT CAST(123, 'String');
-- This form is equivalent. Keep in mind that the type has to be a constant expression:
SELECT CAST(123, 'Str'||'ing'); -- this works.
-- This does not work: SELECT materialize('String') AS type, CAST(123, type);
-- It is also case-insensitive:
SELECT CasT(123, 'String');
-- The functional form exists for the consistency of implementation (as every operator also exists in the functional form and the functional form is represented in the query's Abstract Syntax Tree). Anyway, the functional form also makes sense for users, when they need to construct a data type name from a constant expression, or when they want to generate a query programmatically.
-- It's worth noting that the operator form does not allow to specify the type name as a string literal:
-- This does not work: SELECT CAST(123 AS 'String');
-- By only allowing it as an identifier, either bare word:
SELECT CAST(123 AS String);
-- Or as a MySQL or PostgreSQL quoted identifiers:
SELECT CAST(123 AS `String`);
SELECT CAST(123 AS "String");
-- While the functional form only allows the type name as a string literal:
SELECT CAST(123, 'String'); -- works
SELECT CAST(123, String); -- { serverError UNKNOWN_IDENTIFIER }
-- However, you can cheat:
SELECT 'String' AS String, CAST(123, String);
-- 3. The internal function `_CAST` which is different from `CAST` only by being not dependent on the value of `cast_keep_nullable` setting and other settings.
-- This is needed when ClickHouse has to persist an expression for future use, like in table definitions, including primary and partition keys and other indices.
-- The function is not intended to be used directly. When a user uses a regular `CAST` operator or function in a table definition, it is transparently converted to `_CAST` to persist its behavior. However, the user can still use the internal version directly:
SELECT _CAST(x, 'UInt8') AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('456'));
-- There is no operator form of this function:
-- does not work, here UInt8 is interpreted as an alias for the value:
SELECT _CAST(123 AS UInt8); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT CAST(123 AS UInt8); -- works
-- 4. PostgreSQL-style cast syntax `::`
SELECT 123::String;
-- It has a difference from the `CAST` operator: if it is applied to a simple literal value, instead of performing a type conversion, it invokes the SQL parser directly on the corresponding text fragment of the query. The most important case will be the floating-point and decimal types.
-- In this example, we parse `1.1` as Decimal and do not involve any type conversion:
SELECT 1.1::Decimal(30, 20);
-- In this example, `1.1` is first parsed as usual, yielding a Float64 value, and then converted to Decimal, producing a wrong result:
SELECT CAST(1.1 AS Decimal(30, 20));
-- We can change this behavior in the future.
-- Another example:
SELECT -1::UInt64; -- { serverError CANNOT_PARSE_NUMBER }
SELECT CAST(-1 AS UInt64); -- conversion with overflow
-- For composite data types, if a value is a literal, it is parsed directly:
SELECT [1.1, 2.3]::Array(Decimal(30, 20));
-- But if the value contains expressions, the usage of the `::` operator will be equivalent to invoking the CAST operator on the expression:
SELECT [1.1, 2.3 + 0]::Array(Decimal(30, 20));
-- The automatic column name for the result of an application of the `::` operator may be the same as for the result of an application of the CAST operator to a string containing the corresponding fragment of the query or to a corresponding expression:
SELECT 1.1::Decimal(30, 20), CAST('1.1' AS Decimal(30, 20)), (1+1)::UInt8 FORMAT Vertical;
-- The operator has the highest priority among others:
SELECT 1-1::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- But one interesting example is the unary minus. Here the minus is not an operator but part of the numeric literal:
SELECT -1::String;
-- Here it is an operator:
SELECT 1 AS x, -x::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- 5. Accurate casting functions: `accurateCast`, `accurateCastOrNull`, `accurateCastOrDefault`.
-- These functions check if the value is exactly representable in the target data type.
-- The function `accurateCast` performs the conversion or throws an exception if the value is not exactly representable:
SELECT accurateCast(1.123456789, 'Float32'); -- { serverError CANNOT_CONVERT_TYPE }
-- The function `accurateCastOrNull` always wraps the target type into Nullable, and returns NULL if the value is not exactly representable:
SELECT accurateCastOrNull(1.123456789, 'Float32');
-- The function `accurateCastOrDefault` takes an additional parameter, which must be of the target type, and returns it if the value is not exactly representable:
SELECT accurateCastOrDefault(-1, 'UInt64', 0::UInt64);
-- These functions are case-sensitive, and there are no corresponding operators:
SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }.
-- 6. Explicit conversion functions:
-- `toString`, `toFixedString`,
-- `toUInt8`, `toUInt16`, `toUInt32`, `toUInt64`, `toUInt128`, `toUInt256`,
-- `toInt8`, `toInt16`, `toInt32`, `toInt64`, `toInt128`, `toInt256`,
-- `toFloat32`, `toFloat64`,
-- `toDecimal32`, `toDecimal64`, `toDecimal128`, `toDecimal256`,
-- `toDate`, `toDate32`, `toDateTime`, `toDateTime64`,
-- `toUUID`, `toIPv4`, `toIPv6`,
-- `toIntervalNanosecond`, `toIntervalMicrosecond`, `toIntervalMillisecond`,
-- `toIntervalSecond`, `toIntervalMinute`, `toIntervalHour`,
-- `toIntervalDay`, `toIntervalWeek`, `toIntervalMonth`, `toIntervalQuarter`, `toIntervalYear`
-- These functions work under the same rules as the CAST operator and can be thought as elementary implementation parts of that operator. They allow implementation-defined overflow while converting between numeric types.
SELECT toUInt8(-1);
-- These are ClickHouse-native conversion functions. They take an argument with the input value, and for some of the data types (`FixedString`, `DateTime`, `DateTime64`, `Decimal`s), the subsequent arguments are constant expressions, defining the parameters of these data types, or the rules to interpret the source value.
SELECT toFloat64(123); -- no arguments
SELECT toFixedString('Hello', 10) FORMAT TSV; -- the parameter of the FixedString data type, the function returns FixedString(10)
SELECT toFixedString('Hello', 5 + 5) FORMAT TSV; -- it can be a constant expression
SELECT toDecimal32('123.456', 2); -- the scale of the Decimal data type
SELECT toDateTime('2024-04-25 01:02:03', 'Europe/Amsterdam'); -- the time zone of DateTime
SELECT toDateTime64('2024-04-25 01:02:03', 6, 'Europe/Amsterdam'); -- the scale of DateTime64 and its time zone
-- The length of FixedString and the scale of Decimal and DateTime64 types are mandatory arguments, while the time zone of the DateTime data type is optional.
-- If the time zone is not specified, the time zone of the argument's data type is used, and if the argument is not a date time, the session time zone is used.
SELECT toDateTime('2024-04-25 01:02:03');
SELECT toDateTime64('2024-04-25 01:02:03.456789', 6);
-- Here, the time zone can be specified as the rule of interpretation of the value during conversion:
SELECT toString(1710612085::DateTime, 'America/Los_Angeles');
SELECT toString(1710612085::DateTime);
-- In the case when the time zone is not the part of the resulting data type, but a rule of interpretation of the source value,
-- it can be non-constant. Let's clarify: in this example, the resulting data type is a String; it does not have a time zone parameter:
SELECT toString(1710612085::DateTime, tz) FROM Values('tz String', 'Europe/Amsterdam', 'America/Los_Angeles');
-- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull and -OrZero fallbacks,
-- that don't throw exceptions on parsing errors.
-- They use the same rules to the accurateCast operator:
SELECT toUInt8OrNull('123'), toUInt8OrNull('-123'), toUInt8OrNull('1234'), toUInt8OrNull(' 123');
SELECT toUInt8OrZero('123'), toUInt8OrZero('-123'), toUInt8OrZero('1234'), toUInt8OrZero(' 123');
SELECT toTypeName(toUInt8OrNull('123')), toTypeName(toUInt8OrZero('123'));
-- These functions are only applicable to string data types.
-- Although it is a room for extension:
SELECT toUInt8OrNull(123); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- String and FixedString work:
SELECT toUInt8OrNull(123::FixedString(3));
-- For the FixedString data type trailing zero bytes are allowed, because they are the padding for FixedString:
SELECT toUInt8OrNull('123'::FixedString(4));
SELECT toUInt8OrNull('123\0'::FixedString(4));
-- While for String, they don't:
SELECT toUInt8OrNull('123\0');
-- 7. SQL-compatibility type-defining operators:
SELECT DATE '2024-04-25', TIMESTAMP '2024-01-01 02:03:04', INTERVAL 1 MINUTE, INTERVAL '12 hour';
-- These operators are interpreted as the corresponding explicit conversion functions.
-- 8. SQL-compatibility aliases for explicit conversion functions:
SELECT DATE('2024-04-25'), TIMESTAMP('2024-01-01 02:03:04'), FROM_UNIXTIME(1234567890);
-- These functions exist for compatibility with MySQL. They are case-insensitive.
SELECT date '2024-04-25', timeSTAMP('2024-01-01 02:03:04'), From_Unixtime(1234567890);
-- 9. Specialized conversion functions:
-- `parseDateTimeBestEffort`, `parseDateTimeBestEffortUS`, `parseDateTime64BestEffort`, `parseDateTime64BestEffortUS`, `toUnixTimestamp`
-- These functions are similar to explicit conversion functions but provide special rules on how the conversion is performed.
SELECT parseDateTimeBestEffort('25 Apr 1986 1pm');
-- 10. Functions for converting between different components or rounding of date and time data types.
SELECT toDayOfMonth(toDateTime(1234567890));
-- These functions are covered in a separate topic.

View File

@ -7,8 +7,6 @@ TU_EXCLUDES=(
AggregateFunctionUniq
FunctionsConversion
RangeHashedDictionary
Aggregator
)