mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
RangeHashed dictionary support has function
This commit is contained in:
parent
79b061ac3c
commit
854bfaf366
@ -52,7 +52,6 @@ namespace ErrorCodes
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
||||
@ -178,10 +177,76 @@ ColumnPtr RangeHashedDictionary::getColumn(
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns &, const DataTypes &) const
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Has not supported", getDictionaryID().getNameForLogs());
|
||||
auto range_storage_column = key_columns[1];
|
||||
ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types[1], ""};
|
||||
|
||||
auto range_column_storage_type = std::make_shared<DataTypeInt64>();
|
||||
auto range_column_updated = castColumnAccurate(column_to_cast, range_column_storage_type);
|
||||
|
||||
PaddedPODArray<Key> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
const PaddedPODArray<Key> & ids = getColumnVectorData(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnVectorData(this, range_column_updated, range_backup_storage);
|
||||
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
ColumnUInt8::Ptr result;
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
result = hasKeysImpl<ValueType>(attribute, ids, dates);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename AttributeType>
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::hasKeysImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates) const
|
||||
{
|
||||
auto result = ColumnUInt8::create(ids.size());
|
||||
auto& out = result->getData();
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto it = attr.find(ids[row]);
|
||||
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[row];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it = std::find_if(
|
||||
std::begin(ranges_and_values),
|
||||
std::end(ranges_and_values),
|
||||
[date](const Value<AttributeType> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
if (val_it != std::end(ranges_and_values))
|
||||
out[row] = true;
|
||||
else
|
||||
out[row] = false;
|
||||
}
|
||||
else
|
||||
out[row] = false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::createAttributes()
|
||||
|
@ -159,6 +159,12 @@ private:
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType>
|
||||
ColumnUInt8::Ptr hasKeysImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates) const;
|
||||
|
||||
template <typename T>
|
||||
static void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const Field & value);
|
||||
|
||||
|
@ -54,7 +54,6 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
@ -154,13 +153,20 @@ public:
|
||||
String getName() const override { return name; }
|
||||
|
||||
private:
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const final { return true; }
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
|
||||
+ ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
@ -173,8 +179,6 @@ private:
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
/** Do not require existence of the dictionary if the function is called for empty columns.
|
||||
@ -194,6 +198,24 @@ private:
|
||||
const auto key_column = key_column_with_type.column;
|
||||
const auto key_column_type = WhichDataType(key_column_with_type.type);
|
||||
|
||||
ColumnPtr range_col = nullptr;
|
||||
DataTypePtr range_col_type = nullptr;
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::range)
|
||||
{
|
||||
if (arguments.size() != 3)
|
||||
throw Exception{"Wrong argument count for function " + getName()
|
||||
+ " when dictionary has key type range", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
range_col = arguments[2].column;
|
||||
range_col_type = arguments[2].type;
|
||||
|
||||
if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
|
||||
throw Exception{"Illegal type " + range_col_type->getName() + " of fourth argument of function "
|
||||
+ getName() + " must be convertible to Int64.",
|
||||
ErrorCodes::ILLEGAL_COLUMN};
|
||||
}
|
||||
|
||||
if (dictionary_key_type == DictionaryKeyType::simple)
|
||||
{
|
||||
if (!key_column_type.isUInt64())
|
||||
@ -217,7 +239,7 @@ private:
|
||||
return dictionary->hasKeys(key_columns, key_types);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Has not supported for range dictionary", dictionary->getDictionaryID().getNameForLogs());
|
||||
return dictionary->hasKeys({key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type});
|
||||
}
|
||||
|
||||
mutable FunctionDictHelper helper;
|
||||
|
@ -0,0 +1,32 @@
|
||||
Dictionary not nullable
|
||||
dictGet
|
||||
0.33
|
||||
0.42
|
||||
0.46
|
||||
0.2
|
||||
0.4
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select columns from dictionary
|
||||
1 2019-05-05 2019-05-20 0.33
|
||||
1 2019-05-21 2019-05-30 0.42
|
||||
2 2019-05-21 2019-05-30 0.46
|
||||
Dictionary nullable
|
||||
dictGet
|
||||
0.33
|
||||
0.42
|
||||
\N
|
||||
0.2
|
||||
0.4
|
||||
dictHas
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
select columns from dictionary
|
||||
1 2019-05-05 2019-05-20 0.33
|
||||
1 2019-05-21 2019-05-30 0.42
|
||||
2 2019-05-21 2019-05-30 \N
|
96
tests/queries/0_stateless/01676_range_hashed_dictionary.sql
Normal file
96
tests/queries/0_stateless/01676_range_hashed_dictionary.sql
Normal file
@ -0,0 +1,96 @@
|
||||
DROP DATABASE IF EXISTS database_for_range_dict;
|
||||
|
||||
CREATE DATABASE database_for_range_dict;
|
||||
|
||||
CREATE TABLE database_for_range_dict.date_table
|
||||
(
|
||||
CountryID UInt64,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Float64
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY CountryID;
|
||||
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), 0.46);
|
||||
|
||||
CREATE DICTIONARY database_for_range_dict.range_dictionary
|
||||
(
|
||||
CountryID UInt64,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Float64 DEFAULT 0.2
|
||||
)
|
||||
PRIMARY KEY CountryID
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_range_dict'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
SELECT 'Dictionary not nullable';
|
||||
SELECT 'dictGet';
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-15'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(1), toDate('2019-05-29'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-29'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT dictGetOrDefault('database_for_range_dict.range_dictionary', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4);
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate('2019-05-15'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(1), toDate('2019-05-29'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-29'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT * FROM database_for_range_dict.range_dictionary;
|
||||
|
||||
DROP TABLE database_for_range_dict.date_table;
|
||||
DROP DICTIONARY database_for_range_dict.range_dictionary;
|
||||
|
||||
CREATE TABLE database_for_range_dict.date_table
|
||||
(
|
||||
CountryID UInt64,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Nullable(Float64)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY CountryID;
|
||||
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-05'), toDate('2019-05-20'), 0.33);
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(1, toDate('2019-05-21'), toDate('2019-05-30'), 0.42);
|
||||
INSERT INTO database_for_range_dict.date_table VALUES(2, toDate('2019-05-21'), toDate('2019-05-30'), NULL);
|
||||
|
||||
CREATE DICTIONARY database_for_range_dict.range_dictionary_nullable
|
||||
(
|
||||
CountryID UInt64,
|
||||
StartDate Date,
|
||||
EndDate Date,
|
||||
Tax Nullable(Float64) DEFAULT 0.2
|
||||
)
|
||||
PRIMARY KEY CountryID
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'date_table' DB 'database_for_range_dict'))
|
||||
LIFETIME(MIN 1 MAX 1000)
|
||||
LAYOUT(RANGE_HASHED())
|
||||
RANGE(MIN StartDate MAX EndDate);
|
||||
|
||||
SELECT 'Dictionary nullable';
|
||||
SELECT 'dictGet';
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-15'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(1), toDate('2019-05-29'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-29'));
|
||||
SELECT dictGet('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT dictGetOrDefault('database_for_range_dict.range_dictionary_nullable', 'Tax', toUInt64(2), toDate('2019-05-31'), 0.4);
|
||||
SELECT 'dictHas';
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1), toDate('2019-05-15'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(1), toDate('2019-05-29'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-29'));
|
||||
SELECT dictHas('database_for_range_dict.range_dictionary_nullable', toUInt64(2), toDate('2019-05-31'));
|
||||
SELECT 'select columns from dictionary';
|
||||
SELECT * FROM database_for_range_dict.range_dictionary_nullable;
|
||||
|
||||
DROP TABLE database_for_range_dict.date_table;
|
||||
DROP DICTIONARY database_for_range_dict.range_dictionary_nullable;
|
||||
|
||||
DROP DATABASE database_for_range_dict;
|
||||
|
Loading…
Reference in New Issue
Block a user