Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
BayoNet 2018-12-11 11:19:58 +03:00
commit 2c0c09e0b2
225 changed files with 5867 additions and 3536 deletions

View File

@ -50,7 +50,8 @@ IncludeCategories:
- Regex: '.*' - Regex: '.*'
Priority: 40 Priority: 40
ReflowComments: false ReflowComments: false
AlignEscapedNewlinesLeft: true AlignEscapedNewlinesLeft: false
AlignEscapedNewlines: DontAlign
# Not changed: # Not changed:
AccessModifierOffset: -4 AccessModifierOffset: -4

View File

@ -1,5 +1,5 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54409 CACHE STRING "") set(VERSION_REVISION 54410 CACHE STRING "") # changed manually for tests
set(VERSION_MAJOR 18 CACHE STRING "") set(VERSION_MAJOR 18 CACHE STRING "")
set(VERSION_MINOR 14 CACHE STRING "") set(VERSION_MINOR 14 CACHE STRING "")
set(VERSION_PATCH 17 CACHE STRING "") set(VERSION_PATCH 17 CACHE STRING "")

View File

@ -43,6 +43,7 @@
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/UseSSL.h> #include <IO/UseSSL.h>
#include <DataStreams/AsynchronousBlockInputStream.h> #include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <DataStreams/InternalTextLogsRowOutputStream.h> #include <DataStreams/InternalTextLogsRowOutputStream.h>
#include <Parsers/ParserQuery.h> #include <Parsers/ParserQuery.h>
#include <Parsers/ASTSetQuery.h> #include <Parsers/ASTSetQuery.h>
@ -60,6 +61,7 @@
#include <Functions/registerFunctions.h> #include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
#include <Common/Config/configReadClient.h> #include <Common/Config/configReadClient.h>
#include <Storages/ColumnsDescription.h>
#if USE_READLINE #if USE_READLINE
#include "Suggest.h" // Y_IGNORE #include "Suggest.h" // Y_IGNORE
@ -69,7 +71,6 @@
#pragma GCC optimize("-fno-var-tracking-assignments") #pragma GCC optimize("-fno-var-tracking-assignments")
#endif #endif
/// http://en.wikipedia.org/wiki/ANSI_escape_code /// http://en.wikipedia.org/wiki/ANSI_escape_code
/// Similar codes \e[s, \e[u don't work in VT100 and Mosh. /// Similar codes \e[s, \e[u don't work in VT100 and Mosh.
@ -875,11 +876,12 @@ private:
/// Receive description of table structure. /// Receive description of table structure.
Block sample; Block sample;
if (receiveSampleBlock(sample)) ColumnsDescription columns_description;
if (receiveSampleBlock(sample, columns_description))
{ {
/// If structure was received (thus, server has not thrown an exception), /// If structure was received (thus, server has not thrown an exception),
/// send our data with that structure. /// send our data with that structure.
sendData(sample); sendData(sample, columns_description);
receiveEndOfQuery(); receiveEndOfQuery();
} }
} }
@ -917,7 +919,7 @@ private:
} }
void sendData(Block & sample) void sendData(Block & sample, const ColumnsDescription & columns_description)
{ {
/// If INSERT data must be sent. /// If INSERT data must be sent.
const ASTInsertQuery * parsed_insert_query = typeid_cast<const ASTInsertQuery *>(&*parsed_query); const ASTInsertQuery * parsed_insert_query = typeid_cast<const ASTInsertQuery *>(&*parsed_query);
@ -928,19 +930,19 @@ private:
{ {
/// Send data contained in the query. /// Send data contained in the query.
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);
sendDataFrom(data_in, sample); sendDataFrom(data_in, sample, columns_description);
} }
else if (!is_interactive) else if (!is_interactive)
{ {
/// Send data read from stdin. /// Send data read from stdin.
sendDataFrom(std_in, sample); sendDataFrom(std_in, sample, columns_description);
} }
else else
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
} }
void sendDataFrom(ReadBuffer & buf, Block & sample) void sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description)
{ {
String current_format = insert_format; String current_format = insert_format;
@ -952,6 +954,10 @@ private:
BlockInputStreamPtr block_input = context.getInputFormat( BlockInputStreamPtr block_input = context.getInputFormat(
current_format, buf, sample, insert_format_max_block_size); current_format, buf, sample, insert_format_max_block_size);
const auto & column_defaults = columns_description.defaults;
if (!column_defaults.empty())
block_input = std::make_shared<AddingDefaultsBlockInputStream>(block_input, column_defaults, context);
BlockInputStreamPtr async_block_input = std::make_shared<AsynchronousBlockInputStream>(block_input); BlockInputStreamPtr async_block_input = std::make_shared<AsynchronousBlockInputStream>(block_input);
async_block_input->readPrefix(); async_block_input->readPrefix();
@ -1089,7 +1095,7 @@ private:
/// Receive the block that serves as an example of the structure of table where data will be inserted. /// Receive the block that serves as an example of the structure of table where data will be inserted.
bool receiveSampleBlock(Block & out) bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description)
{ {
while (true) while (true)
{ {
@ -1110,6 +1116,10 @@ private:
onLogData(packet.block); onLogData(packet.block);
break; break;
case Protocol::Server::TableColumns:
columns_description = ColumnsDescription::parse(packet.multistring_message[1]);
return receiveSampleBlock(out, columns_description);
default: default:
throw NetException("Unexpected packet from server (expected Data, Exception or Log, got " throw NetException("Unexpected packet from server (expected Data, Exception or Log, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);

View File

@ -30,6 +30,7 @@
#include <Storages/StorageMemory.h> #include <Storages/StorageMemory.h>
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Core/ExternalTable.h> #include <Core/ExternalTable.h>
#include <Storages/ColumnDefault.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include "TCPHandler.h" #include "TCPHandler.h"
@ -360,6 +361,14 @@ void TCPHandler::processInsertQuery(const Settings & global_settings)
*/ */
state.io.out->writePrefix(); state.io.out->writePrefix();
/// Send ColumnsDescription for insertion table
if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA)
{
const auto & db_and_table = query_context.getInsertionTable();
if (auto * columns = ColumnsDescription::loadFromContext(query_context, db_and_table.first, db_and_table.second))
sendTableColumns(*columns);
}
/// Send block to the client - table structure. /// Send block to the client - table structure.
Block block = state.io.out->getHeader(); Block block = state.io.out->getHeader();
@ -389,6 +398,17 @@ void TCPHandler::processOrdinaryQuery()
/// Send header-block, to allow client to prepare output format for data to send. /// Send header-block, to allow client to prepare output format for data to send.
{ {
Block header = state.io.in->getHeader(); Block header = state.io.in->getHeader();
/// Send data to old clients without low cardinality type.
if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)
{
for (auto & column : header)
{
column.column = recursiveRemoveLowCardinality(column.column);
column.type = recursiveRemoveLowCardinality(column.type);
}
}
if (header) if (header)
sendData(header); sendData(header);
} }
@ -860,6 +880,16 @@ void TCPHandler::sendLogData(const Block & block)
out->next(); out->next();
} }
void TCPHandler::sendTableColumns(const ColumnsDescription & columns)
{
writeVarUInt(Protocol::Server::TableColumns, *out);
/// Send external table name (empty name is the main table)
writeStringBinary("", *out);
writeStringBinary(columns.toString(), *out);
out->next();
}
void TCPHandler::sendException(const Exception & e, bool with_stack_trace) void TCPHandler::sendException(const Exception & e, bool with_stack_trace)
{ {

View File

@ -144,6 +144,7 @@ private:
void sendHello(); void sendHello();
void sendData(const Block & block); /// Write a block to the network. void sendData(const Block & block); /// Write a block to the network.
void sendLogData(const Block & block); void sendLogData(const Block & block);
void sendTableColumns(const ColumnsDescription & columns);
void sendException(const Exception & e, bool with_stack_trace); void sendException(const Exception & e, bool with_stack_trace);
void sendProgress(); void sendProgress();
void sendLogs(); void sendLogs();

View File

@ -603,6 +603,10 @@ Connection::Packet Connection::receivePacket()
res.block = receiveLogData(); res.block = receiveLogData();
return res; return res;
case Protocol::Server::TableColumns:
res.multistring_message = receiveMultistringMessage(res.type);
return res;
case Protocol::Server::EndOfStream: case Protocol::Server::EndOfStream:
return res; return res;
@ -712,6 +716,16 @@ std::unique_ptr<Exception> Connection::receiveException()
} }
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
{
size_t num = Protocol::Server::stringsInMessage(msg_type);
std::vector<String> out(num);
for (size_t i = 0; i < num; ++i)
readStringBinary(out[i], *in);
return out;
}
Progress Connection::receiveProgress() Progress Connection::receiveProgress()
{ {
//LOG_TRACE(log_wrapper.get(), "Receiving progress"); //LOG_TRACE(log_wrapper.get(), "Receiving progress");

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <optional>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <Poco/Net/StreamSocket.h> #include <Poco/Net/StreamSocket.h>
@ -96,6 +98,7 @@ public:
Block block; Block block;
std::unique_ptr<Exception> exception; std::unique_ptr<Exception> exception;
std::vector<String> multistring_message;
Progress progress; Progress progress;
BlockStreamProfileInfo profile_info; BlockStreamProfileInfo profile_info;
@ -254,6 +257,7 @@ private:
Block receiveLogData(); Block receiveLogData();
Block receiveDataImpl(BlockInputStreamPtr & stream); Block receiveDataImpl(BlockInputStreamPtr & stream);
std::vector<String> receiveMultistringMessage(UInt64 msg_type);
std::unique_ptr<Exception> receiveException(); std::unique_ptr<Exception> receiveException();
Progress receiveProgress(); Progress receiveProgress();
BlockStreamProfileInfo receiveProfileInfo(); BlockStreamProfileInfo receiveProfileInfo();

View File

@ -142,7 +142,7 @@ struct HashTableCell
/// Deserialization, in binary and text form. /// Deserialization, in binary and text form.
void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); } void read(DB::ReadBuffer & rb) { DB::readBinary(key, rb); }
void readText(DB::ReadBuffer & rb) { DB::writeDoubleQuoted(key, rb); } void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
}; };

View File

@ -99,6 +99,13 @@ void Block::insertUnique(ColumnWithTypeAndName && elem)
} }
void Block::erase(const std::set<size_t> & positions)
{
for (auto it = positions.rbegin(); it != positions.rend(); ++it)
erase(*it);
}
void Block::erase(size_t position) void Block::erase(size_t position)
{ {
if (data.empty()) if (data.empty())

View File

@ -2,6 +2,7 @@
#include <vector> #include <vector>
#include <list> #include <list>
#include <set>
#include <map> #include <map>
#include <initializer_list> #include <initializer_list>
@ -51,6 +52,8 @@ public:
void insertUnique(ColumnWithTypeAndName && elem); void insertUnique(ColumnWithTypeAndName && elem);
/// remove the column at the specified position /// remove the column at the specified position
void erase(size_t position); void erase(size_t position);
/// remove the columns at the specified positions
void erase(const std::set<size_t> & positions);
/// remove the column with the specified name /// remove the column with the specified name
void erase(const String & name); void erase(const String & name);

View File

@ -58,4 +58,20 @@ void BlockInfo::read(ReadBuffer & in)
} }
} }
void BlockMissingValues::setBit(size_t column_idx, size_t row_idx)
{
RowsBitMask & mask = rows_mask_by_column_id[column_idx];
mask.resize(row_idx + 1);
mask[row_idx] = true;
}
const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const
{
static RowsBitMask none;
auto it = rows_mask_by_column_id.find(column_idx);
if (it != rows_mask_by_column_id.end())
return it->second;
return none;
}
} }

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <unordered_map>
#include <Core/Types.h> #include <Core/Types.h>
@ -43,4 +45,24 @@ struct BlockInfo
void read(ReadBuffer & in); void read(ReadBuffer & in);
}; };
/// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
class BlockMissingValues
{
public:
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
void setBit(size_t column_idx, size_t row_idx);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }
void clear() { rows_mask_by_column_id.clear(); }
private:
using RowsMaskByColumnId = std::unordered_map<size_t, RowsBitMask>;
/// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default.
/// It could contain less columns and rows then related block.
RowsMaskByColumnId rows_mask_by_column_id;
};
} }

View File

@ -51,6 +51,7 @@
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated). /// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408 #define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405 #define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405

View File

@ -69,7 +69,8 @@ namespace Protocol
Totals = 7, /// A block with totals (compressed or not). Totals = 7, /// A block with totals (compressed or not).
Extremes = 8, /// A block with minimums and maximums (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not).
TablesStatusResponse = 9, /// A response to TablesStatus request. TablesStatusResponse = 9, /// A response to TablesStatus request.
Log = 10 /// System logs of the query execution Log = 10, /// System logs of the query execution
TableColumns = 11, /// Columns' description for default values calculation
}; };
/// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
@ -78,11 +79,24 @@ namespace Protocol
/// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
inline const char * toString(UInt64 packet) inline const char * toString(UInt64 packet)
{ {
static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" }; static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals",
return packet < 11 "Extremes", "TablesStatusResponse", "Log", "TableColumns" };
return packet < 12
? data[packet] ? data[packet]
: "Unknown packet"; : "Unknown packet";
} }
inline size_t stringsInMessage(UInt64 msg_type)
{
switch (msg_type)
{
case TableColumns:
return 2;
default:
break;
}
return 0;
}
} }
/// Packet types that client transmits. /// Packet types that client transmits.
@ -103,8 +117,8 @@ namespace Protocol
inline const char * toString(UInt64 packet) inline const char * toString(UInt64 packet)
{ {
static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest" }; static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive" };
return packet < 6 return packet < 7
? data[packet] ? data[packet]
: "Unknown packet"; : "Unknown packet";
} }

View File

@ -0,0 +1,205 @@
#include <Common/typeid_cast.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/evaluateMissingDefaults.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnConst.h>
#include <Columns/FilterDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int TYPE_MISMATCH;
}
AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(const BlockInputStreamPtr & input,
const ColumnDefaults & column_defaults_,
const Context & context_)
: column_defaults(column_defaults_),
context(context_)
{
children.push_back(input);
header = input->getHeader();
}
Block AddingDefaultsBlockInputStream::readImpl()
{
Block res = children.back()->read();
if (!res)
return res;
if (column_defaults.empty())
return res;
const BlockMissingValues & block_missing_values = children.back()->getMissingValues();
if (block_missing_values.empty())
return res;
Block evaluate_block{res};
/// remove columns for recalculation
for (const auto & column : column_defaults)
if (evaluate_block.has(column.first))
evaluate_block.erase(column.first);
evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false);
std::unordered_map<size_t, MutableColumnPtr> mixed_columns;
for (const ColumnWithTypeAndName & column_def : evaluate_block)
{
const String & column_name = column_def.name;
if (column_defaults.count(column_name) == 0)
continue;
size_t block_column_position = res.getPositionByName(column_name);
ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position);
const auto & defaults_mask = block_missing_values.getDefaultsBitmask(block_column_position);
checkCalculated(column_read, column_def, defaults_mask.size());
if (!defaults_mask.empty())
{
/// TODO: FixedString
if (isColumnedAsNumber(column_read.type) || isDecimal(column_read.type))
{
MutableColumnPtr column_mixed = (*std::move(column_read.column)).mutate();
mixNumberColumns(column_read.type->getTypeId(), column_mixed, column_def.column, defaults_mask);
column_read.column = std::move(column_mixed);
}
else
{
MutableColumnPtr column_mixed = mixColumns(column_read, column_def, defaults_mask);
mixed_columns.emplace(block_column_position, std::move(column_mixed));
}
}
}
if (!mixed_columns.empty())
{
/// replace columns saving block structure
MutableColumns mutation = res.mutateColumns();
for (size_t position = 0; position < mutation.size(); ++position)
{
auto it = mixed_columns.find(position);
if (it != mixed_columns.end())
mutation[position] = std::move(it->second);
}
res.setColumns(std::move(mutation));
}
return res;
}
void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName & col_read,
const ColumnWithTypeAndName & col_defaults,
size_t defaults_needed) const
{
size_t column_size = col_read.column->size();
if (column_size != col_defaults.column->size())
throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (column_size < defaults_needed)
throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (!col_read.type->equals(*col_defaults.type))
throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH);
}
void AddingDefaultsBlockInputStream::mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & column_defs,
const BlockMissingValues::RowsBitMask & defaults_mask) const
{
auto call = [&](const auto & types) -> bool
{
using Types = std::decay_t<decltype(types)>;
using DataType = typename Types::LeftType;
if constexpr (!std::is_same_v<DataType, DataTypeString> && !std::is_same_v<DataType, DataTypeFixedString>)
{
using FieldType = typename DataType::FieldType;
using ColVecType = std::conditional_t<IsDecimalNumber<FieldType>, ColumnDecimal<FieldType>, ColumnVector<FieldType>>;
auto col_read = typeid_cast<ColVecType *>(column_mixed.get());
if (!col_read)
return false;
typename ColVecType::Container & dst = col_read->getData();
if (auto const_col_defs = checkAndGetColumnConst<ColVecType>(column_defs.get()))
{
FieldType value = checkAndGetColumn<ColVecType>(const_col_defs->getDataColumnPtr().get())->getData()[0];
for (size_t i = 0; i < defaults_mask.size(); ++i)
if (defaults_mask[i])
dst[i] = value;
return true;
}
else if (auto col_defs = checkAndGetColumn<ColVecType>(column_defs.get()))
{
auto & src = col_defs->getData();
for (size_t i = 0; i < defaults_mask.size(); ++i)
if (defaults_mask[i])
dst[i] = src[i];
return true;
}
}
return false;
};
if (!callOnIndexAndDataType<void>(type_idx, call))
throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR);
}
MutableColumnPtr AddingDefaultsBlockInputStream::mixColumns(const ColumnWithTypeAndName & col_read,
const ColumnWithTypeAndName & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const
{
size_t column_size = col_read.column->size();
size_t defaults_needed = defaults_mask.size();
MutableColumnPtr column_mixed = col_read.column->cloneEmpty();
for (size_t i = 0; i < defaults_needed; ++i)
{
if (defaults_mask[i])
{
if (col_defaults.column->isColumnConst())
column_mixed->insert((*col_defaults.column)[i]);
else
column_mixed->insertFrom(*col_defaults.column, i);
}
else
column_mixed->insertFrom(*col_read.column, i);
}
for (size_t i = defaults_needed; i < column_size; ++i)
column_mixed->insertFrom(*col_read.column, i);
return column_mixed;
}
}

View File

@ -0,0 +1,38 @@
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
#include <Storages/ColumnDefault.h>
#include <Interpreters/Context.h>
namespace DB
{
/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
class AddingDefaultsBlockInputStream : public IProfilingBlockInputStream
{
public:
AddingDefaultsBlockInputStream(
const BlockInputStreamPtr & input,
const ColumnDefaults & column_defaults_,
const Context & context_);
String getName() const override { return "AddingDefaults"; }
Block getHeader() const override { return header; }
protected:
Block readImpl() override;
private:
Block header;
const ColumnDefaults column_defaults;
const Context & context;
void checkCalculated(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, size_t needed) const;
MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const;
void mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & col_mixed, const ColumnPtr & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const;
};
}

View File

@ -1,11 +1,11 @@
#include <DataStreams/AddingDefaultBlockInputStream.h> #include <DataStreams/AddingMissedBlockInputStream.h>
#include <Interpreters/addMissingDefaults.h> #include <Interpreters/addMissingDefaults.h>
namespace DB namespace DB
{ {
AddingDefaultBlockInputStream::AddingDefaultBlockInputStream( AddingMissedBlockInputStream::AddingMissedBlockInputStream(
const BlockInputStreamPtr & input_, const BlockInputStreamPtr & input_,
const Block & header_, const Block & header_,
const ColumnDefaults & column_defaults_, const ColumnDefaults & column_defaults_,
@ -16,7 +16,7 @@ AddingDefaultBlockInputStream::AddingDefaultBlockInputStream(
children.emplace_back(input); children.emplace_back(input);
} }
Block AddingDefaultBlockInputStream::readImpl() Block AddingMissedBlockInputStream::readImpl()
{ {
Block src = children.back()->read(); Block src = children.back()->read();
if (!src) if (!src)

View File

@ -14,16 +14,16 @@ namespace DB
* 3. Columns that materialized from other columns (materialized columns) * 3. Columns that materialized from other columns (materialized columns)
* All three types of columns are materialized (not constants). * All three types of columns are materialized (not constants).
*/ */
class AddingDefaultBlockInputStream : public IProfilingBlockInputStream class AddingMissedBlockInputStream : public IProfilingBlockInputStream
{ {
public: public:
AddingDefaultBlockInputStream( AddingMissedBlockInputStream(
const BlockInputStreamPtr & input_, const BlockInputStreamPtr & input_,
const Block & header_, const Block & header_,
const ColumnDefaults & column_defaults_, const ColumnDefaults & column_defaults_,
const Context & context_); const Context & context_);
String getName() const override { return "AddingDefault"; } String getName() const override { return "AddingMissed"; }
Block getHeader() const override { return header; } Block getHeader() const override { return header; }
private: private:

View File

@ -63,6 +63,12 @@ public:
*/ */
virtual Block read() = 0; virtual Block read() = 0;
virtual const BlockMissingValues & getMissingValues() const
{
static const BlockMissingValues none;
return none;
}
/** Read something before starting all data or after the end of all data. /** Read something before starting all data or after the end of all data.
* In the `readSuffix` function, you can implement a finalization that can lead to an exception. * In the `readSuffix` function, you can implement a finalization that can lead to an exception.
* readPrefix() must be called before the first call to read(). * readPrefix() must be called before the first call to read().

View File

@ -4,7 +4,8 @@
#include <IO/ReadBufferFromMemory.h> #include <IO/ReadBufferFromMemory.h>
#include <DataStreams/BlockIO.h> #include <DataStreams/BlockIO.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h> #include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Storages/ColumnsDescription.h>
namespace DB namespace DB
{ {
@ -44,6 +45,10 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers); input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size);
auto columns_description = ColumnsDescription::loadFromContext(context, ast_insert_query->database, ast_insert_query->table);
if (columns_description && !columns_description->defaults.empty())
res_stream = std::make_shared<AddingDefaultsBlockInputStream>(res_stream, columns_description->defaults, context);
} }
} }

View File

@ -153,7 +153,8 @@ Block NativeBlockInputStream::readImpl()
column.column = std::move(read_column); column.column = std::move(read_column);
if (server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) /// Support insert from old clients without low cardinality type.
if (header && server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)
{ {
column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type);
column.type = header.getByPosition(i).type; column.type = header.getByPosition(i).type;

View File

@ -9,6 +9,7 @@
#include <DataStreams/NativeBlockOutputStream.h> #include <DataStreams/NativeBlockOutputStream.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB namespace DB
{ {
@ -100,7 +101,14 @@ void NativeBlockOutputStream::write(const Block & block)
mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes(); mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes();
} }
const ColumnWithTypeAndName & column = block.safeGetByPosition(i); ColumnWithTypeAndName column = block.safeGetByPosition(i);
/// Send data to old clients without low cardinality type.
if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE)
{
column.column = recursiveRemoveLowCardinality(column.column);
column.type = recursiveRemoveLowCardinality(column.type);
}
/// Name /// Name
writeStringBinary(column.name, ostr); writeStringBinary(column.name, ostr);

View File

@ -69,6 +69,9 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
{ {
if (!column)
return column;
if (from_type->equals(*to_type)) if (from_type->equals(*to_type))
return column; return column;

View File

@ -514,6 +514,13 @@ inline bool isNumber(const T & data_type)
return which.isInt() || which.isUInt() || which.isFloat(); return which.isInt() || which.isUInt() || which.isFloat();
} }
template <typename T>
inline bool isColumnedAsNumber(const T & data_type)
{
WhichDataType which(data_type);
return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDateTime() || which.isUUID();
}
template <typename T> template <typename T>
inline bool isString(const T & data_type) inline bool isString(const T & data_type)
{ {

View File

@ -1,24 +1,24 @@
#include "CacheDictionary.h" #include "CacheDictionary.h"
#include <functional> #include <functional>
#include <sstream>
#include <memory> #include <memory>
#include <Columns/ColumnsNumber.h> #include <sstream>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Common/BitHelpers.h> #include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/HashTable/Hash.h>
#include <Common/Stopwatch.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
#include <Common/HashTable/Hash.h>
#include <Common/ProfileEvents.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include "DictionaryBlockInputStream.h"
#include <ext/size.h>
#include <ext/range.h>
#include <ext/map.h> #include <ext/map.h>
#include "DictionaryFactory.h" #include <ext/range.h>
#include <ext/size.h>
#include "CacheDictionary.inc.h" #include "CacheDictionary.inc.h"
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
namespace ProfileEvents namespace ProfileEvents
{ {
@ -42,7 +42,6 @@ namespace CurrentMetrics
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -61,15 +60,20 @@ inline size_t CacheDictionary::getCellIdx(const Key id) const
} }
CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, CacheDictionary::CacheDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
const size_t size) const size_t size)
: name{name}, dict_struct(dict_struct), : name{name}
source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), , dict_struct(dict_struct)
size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, , source_ptr{std::move(source_ptr)}
size_overlap_mask{this->size - 1}, , dict_lifetime(dict_lifetime)
cells{this->size}, , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}
rnd_engine(randomSeed()) , size_overlap_mask{this->size - 1}
, cells{this->size}
, rnd_engine(randomSeed())
{ {
if (!this->source_ptr->supportsSelectiveLoad()) if (!this->source_ptr->supportsSelectiveLoad())
throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
@ -79,7 +83,8 @@ CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStruc
CacheDictionary::CacheDictionary(const CacheDictionary & other) CacheDictionary::CacheDictionary(const CacheDictionary & other)
: CacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} : CacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size}
{} {
}
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
@ -91,15 +96,18 @@ void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<K
/// Allow to use single value in same way as array. /// Allow to use single value in same way as array.
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx) { return arr[idx]; } static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx)
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) { return value; } {
return arr[idx];
}
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t)
{
return value;
}
template <typename AncestorType> template <typename AncestorType>
void CacheDictionary::isInImpl( void CacheDictionary::isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
/// Transform all children to parents until ancestor id or null_value will be reached. /// Transform all children to parents until ancestor id or null_value will be reached.
@ -164,25 +172,17 @@ void CacheDictionary::isInImpl(
} }
void CacheDictionary::isInVectorVector( void CacheDictionary::isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_ids, out); isInImpl(child_ids, ancestor_ids, out);
} }
void CacheDictionary::isInVectorConstant( void CacheDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & child_ids,
const Key ancestor_id,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_id, out); isInImpl(child_ids, ancestor_id, out);
} }
void CacheDictionary::isInConstantVector( void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const Key child_id,
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
/// Special case with single child value. /// Special case with single child value.
@ -213,7 +213,8 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded
{ {
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto null_value = StringRef{std::get<String>(attribute.null_values)}; const auto null_value = StringRef{std::get<String>(attribute.null_values)};
@ -221,23 +222,23 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded
} }
void CacheDictionary::getString( void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
ColumnString * const out) const
{ {
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
} }
void CacheDictionary::getString( void CacheDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
ColumnString * const out) const
{ {
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
} }
@ -329,11 +330,11 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
return; return;
std::vector<Key> required_ids(outdated_ids.size()); std::vector<Key> required_ids(outdated_ids.size());
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
[] (auto & pair) { return pair.first; });
/// request new values /// request new values
update(required_ids, update(
required_ids,
[&](const auto id, const auto) [&](const auto id, const auto)
{ {
for (const auto row : outdated_ids[id]) for (const auto row : outdated_ids[id])
@ -413,17 +414,39 @@ void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values); break; std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values); break; break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values); break; std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values); break; break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values); break; std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values); break; break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values); break; std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
break;
case AttributeUnderlyingType::Decimal32: case AttributeUnderlyingType::Decimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values); std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
@ -457,21 +480,49 @@ void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, co
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>(); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>(); break; std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>(); break; break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>(); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>(); break; std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>(); break; break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>(); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>(); break; std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>(); break; break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>(); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>(); break; std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Decimal32: std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>(); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>(); break; std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
case AttributeUnderlyingType::Decimal128: std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>(); break; break;
case AttributeUnderlyingType::Decimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
break;
case AttributeUnderlyingType::Decimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -509,8 +560,8 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a
bool CacheDictionary::isEmptyCell(const UInt64 idx) const bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{ {
return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data return (idx != zero_cell_idx && cells[idx].id == 0)
== ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())); || (cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
} }
PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
@ -537,14 +588,12 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na
void registerDictionaryCache(DictionaryFactory & factory) void registerDictionaryCache(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (dict_struct.key) if (dict_struct.key)
throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD};
@ -565,8 +614,6 @@ void registerDictionaryCache(DictionaryFactory & factory)
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
return std::make_unique<CacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size); return std::make_unique<CacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size);
}; };
factory.registerLayout("cache", create_layout); factory.registerLayout("cache", create_layout);
} }

View File

@ -1,31 +1,33 @@
#pragma once #pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <Common/ArenaWithFreeLists.h>
#include <Common/CurrentMetrics.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <ext/bit_cast.h>
#include <cmath>
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <vector> #include <cmath>
#include <map> #include <map>
#include <variant>
#include <pcg_random.hpp>
#include <shared_mutex> #include <shared_mutex>
#include <variant>
#include <vector>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <pcg_random.hpp>
#include <Common/ArenaWithFreeLists.h>
#include <Common/CurrentMetrics.h>
#include <ext/bit_cast.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB namespace DB
{ {
class CacheDictionary final : public IDictionary class CacheDictionary final : public IDictionary
{ {
public: public:
CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, CacheDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
const size_t size); const size_t size);
CacheDictionary(const CacheDictionary & other); CacheDictionary(const CacheDictionary & other);
@ -42,16 +44,12 @@ public:
double getHitRate() const override double getHitRate() const override
{ {
return static_cast<double>(hit_count.load(std::memory_order_acquire)) / return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed);
query_count.load(std::memory_order_relaxed);
} }
size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); }
double getLoadFactor() const override double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; }
{
return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size;
}
bool isCached() const override { return true; } bool isCached() const override { return true; }
@ -63,10 +61,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -77,7 +72,8 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override; void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override; void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@ -106,7 +102,9 @@ public:
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const; ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -124,9 +122,9 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
ColumnString * const out) const; const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const; void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
@ -146,17 +144,17 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def,
ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override; void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using ContainerType = Value[]; template <typename Value>
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>; using ContainerType = Value[];
template <typename Value>
using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
struct CellMetadata final struct CellMetadata final
{ {
@ -183,19 +181,39 @@ private:
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant< std::variant<
UInt8, UInt16, UInt32, UInt64, UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
String> null_values; Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant< std::variant<
ContainerPtrType<UInt8>, ContainerPtrType<UInt16>, ContainerPtrType<UInt32>, ContainerPtrType<UInt64>, ContainerPtrType<UInt8>,
ContainerPtrType<UInt16>,
ContainerPtrType<UInt32>,
ContainerPtrType<UInt64>,
ContainerPtrType<UInt128>, ContainerPtrType<UInt128>,
ContainerPtrType<Int8>, ContainerPtrType<Int16>, ContainerPtrType<Int32>, ContainerPtrType<Int64>, ContainerPtrType<Int8>,
ContainerPtrType<Decimal32>, ContainerPtrType<Decimal64>, ContainerPtrType<Decimal128>, ContainerPtrType<Int16>,
ContainerPtrType<Float32>, ContainerPtrType<Float64>, ContainerPtrType<Int32>,
ContainerPtrType<StringRef>> arrays; ContainerPtrType<Int64>,
ContainerPtrType<Decimal32>,
ContainerPtrType<Decimal64>,
ContainerPtrType<Decimal128>,
ContainerPtrType<Float32>,
ContainerPtrType<Float64>,
ContainerPtrType<StringRef>>
arrays;
}; };
void createAttributes(); void createAttributes();
@ -205,29 +223,17 @@ private:
template <typename OutputType, typename DefaultGetter> template <typename OutputType, typename DefaultGetter>
void getItemsNumber( void getItemsNumber(
Attribute & attribute, Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename DefaultGetter>
void getItemsNumberImpl( void getItemsNumberImpl(
Attribute & attribute, Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const;
template <typename DefaultGetter> template <typename DefaultGetter>
void getItemsString( void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter && get_default) const;
template <typename PresentIdHandler, typename AbsentIdHandler> template <typename PresentIdHandler, typename AbsentIdHandler>
void update( void update(const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const;
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const;
PaddedPODArray<Key> getCachedIds() const; PaddedPODArray<Key> getCachedIds() const;
@ -251,10 +257,7 @@ private:
FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const;
template <typename AncestorType> template <typename AncestorType>
void isInImpl( void isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
const PaddedPODArray<Key> & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
const std::string name; const std::string name;
const DictionaryStructure dict_struct; const DictionaryStructure dict_struct;

View File

@ -1,11 +1,11 @@
#include "CacheDictionary.h" #include "CacheDictionary.h"
#include <ext/size.h> #include <Columns/ColumnsNumber.h>
#include <ext/map.h>
#include <ext/range.h>
#include <Common/ProfilingScopedRWLock.h> #include <Common/ProfilingScopedRWLock.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <Columns/ColumnsNumber.h> #include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
namespace ProfileEvents namespace ProfileEvents
{ {
@ -28,7 +28,6 @@ namespace CurrentMetrics
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -36,12 +35,11 @@ namespace ErrorCodes
template <typename OutputType, typename DefaultGetter> template <typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumber( void CacheDictionary::getItemsNumber(
Attribute & attribute, Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const
{ {
if (false) {} if (false)
{
}
#define DISPATCH(TYPE) \ #define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsNumberImpl<TYPE, OutputType>(attribute, ids, out, std::forward<DefaultGetter>(get_default)); getItemsNumberImpl<TYPE, OutputType>(attribute, ids, out, std::forward<DefaultGetter>(get_default));
@ -60,16 +58,12 @@ void CacheDictionary::getItemsNumber(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumberImpl( void CacheDictionary::getItemsNumberImpl(
Attribute & attribute, Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const
{ {
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> } /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> outdated_ids; std::unordered_map<Key, std::vector<size_t>> outdated_ids;
@ -122,11 +116,11 @@ void CacheDictionary::getItemsNumberImpl(
return; return;
std::vector<Key> required_ids(outdated_ids.size()); std::vector<Key> required_ids(outdated_ids.size());
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
[] (auto & pair) { return pair.first; });
/// request new values /// request new values
update(required_ids, update(
required_ids,
[&](const auto id, const auto cell_idx) [&](const auto id, const auto cell_idx)
{ {
const auto attribute_value = attribute_array[cell_idx]; const auto attribute_value = attribute_array[cell_idx];
@ -143,10 +137,7 @@ void CacheDictionary::getItemsNumberImpl(
template <typename DefaultGetter> template <typename DefaultGetter>
void CacheDictionary::getItemsString( void CacheDictionary::getItemsString(
Attribute & attribute, Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter && get_default) const
{ {
const auto rows = ext::size(ids); const auto rows = ext::size(ids);
@ -245,10 +236,10 @@ void CacheDictionary::getItemsString(
if (!outdated_ids.empty()) if (!outdated_ids.empty())
{ {
std::vector<Key> required_ids(outdated_ids.size()); std::vector<Key> required_ids(outdated_ids.size());
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
[] (auto & pair) { return pair.first; });
update(required_ids, update(
required_ids,
[&](const auto id, const auto cell_idx) [&](const auto id, const auto cell_idx)
{ {
const auto attribute_value = attribute_array[cell_idx]; const auto attribute_value = attribute_array[cell_idx];
@ -277,19 +268,13 @@ void CacheDictionary::getItemsString(
template <typename PresentIdHandler, typename AbsentIdHandler> template <typename PresentIdHandler, typename AbsentIdHandler>
void CacheDictionary::update( void CacheDictionary::update(
const std::vector<Key> & requested_ids, const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const
PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const
{ {
std::unordered_map<Key, UInt8> remaining_ids{requested_ids.size()}; std::unordered_map<Key, UInt8> remaining_ids{requested_ids.size()};
for (const auto id : requested_ids) for (const auto id : requested_ids)
remaining_ids.insert({id, 0}); remaining_ids.insert({id, 0});
std::uniform_int_distribution<UInt64> distribution std::uniform_int_distribution<UInt64> distribution{dict_lifetime.min_sec, dict_lifetime.max_sec};
{
dict_lifetime.min_sec,
dict_lifetime.max_sec
};
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
@ -310,10 +295,8 @@ void CacheDictionary::update(
const auto & ids = id_column->getData(); const auto & ids = id_column->getData();
/// cache column pointers /// cache column pointers
const auto column_ptrs = ext::map<std::vector>(ext::range(0, attributes.size()), [&block] (size_t i) const auto column_ptrs = ext::map<std::vector>(
{ ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); });
return block.safeGetByPosition(i + 1).column.get();
});
for (const auto i : ext::range(0, ids.size())) for (const auto i : ext::range(0, ids.size()))
{ {

View File

@ -1,21 +1,20 @@
#include "ClickHouseDictionarySource.h" #include "ClickHouseDictionarySource.h"
#include "ExternalQueryBuilder.h" #include <memory>
#include "writeParenthesisedString.h"
#include <Client/ConnectionPool.h> #include <Client/ConnectionPool.h>
#include <DataStreams/RemoteBlockInputStream.h> #include <DataStreams/RemoteBlockInputStream.h>
#include "readInvalidateQuery.h" #include <IO/ConnectionTimeouts.h>
#include <Interpreters/executeQuery.h> #include <Interpreters/executeQuery.h>
#include <Common/isLocalAddress.h> #include <Common/isLocalAddress.h>
#include <memory>
#include <ext/range.h> #include <ext/range.h>
#include <IO/ConnectionTimeouts.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h"
#include "readInvalidateQuery.h"
#include "writeParenthesisedString.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNSUPPORTED_METHOD; extern const int UNSUPPORTED_METHOD;
@ -25,13 +24,25 @@ namespace ErrorCodes
static const size_t MAX_CONNECTIONS = 16; static const size_t MAX_CONNECTIONS = 16;
static ConnectionPoolWithFailoverPtr createPool( static ConnectionPoolWithFailoverPtr createPool(
const std::string & host, UInt16 port, bool secure, const std::string & db, const std::string & host,
const std::string & user, const std::string & password, const Context & context) UInt16 port,
bool secure,
const std::string & db,
const std::string & user,
const std::string & password,
const Context & context)
{ {
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef()); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef());
ConnectionPoolPtrs pools; ConnectionPoolPtrs pools;
pools.emplace_back(std::make_shared<ConnectionPool>( pools.emplace_back(std::make_shared<ConnectionPool>(
MAX_CONNECTIONS, host, port, db, user, password, timeouts, "ClickHouseDictionarySource", MAX_CONNECTIONS,
host,
port,
db,
user,
password,
timeouts,
"ClickHouseDictionarySource",
Protocol::Compression::Enable, Protocol::Compression::Enable,
secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); secure ? Protocol::Secure::Enable : Protocol::Secure::Disable));
return std::make_shared<ConnectionPoolWithFailover>(pools, LoadBalancing::RANDOM); return std::make_shared<ConnectionPoolWithFailover>(pools, LoadBalancing::RANDOM);
@ -42,44 +53,52 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
const DictionaryStructure & dict_struct_, const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
const Block & sample_block, Context & context) const Block & sample_block,
: update_time{std::chrono::system_clock::from_time_t(0)}, Context & context)
dict_struct{dict_struct_}, : update_time{std::chrono::system_clock::from_time_t(0)}
host{config.getString(config_prefix + ".host")}, , dict_struct{dict_struct_}
port(config.getInt(config_prefix + ".port")), , host{config.getString(config_prefix + ".host")}
secure(config.getBool(config_prefix + ".secure", false)), , port(config.getInt(config_prefix + ".port"))
user{config.getString(config_prefix + ".user", "")}, , secure(config.getBool(config_prefix + ".secure", false))
password{config.getString(config_prefix + ".password", "")}, , user{config.getString(config_prefix + ".user", "")}
db{config.getString(config_prefix + ".db", "")}, , password{config.getString(config_prefix + ".password", "")}
table{config.getString(config_prefix + ".table")}, , db{config.getString(config_prefix + ".db", "")}
where{config.getString(config_prefix + ".where", "")}, , table{config.getString(config_prefix + ".table")}
update_field{config.getString(config_prefix + ".update_field", "")}, , where{config.getString(config_prefix + ".where", "")}
invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}, , update_field{config.getString(config_prefix + ".update_field", "")}
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
sample_block{sample_block}, context(context), , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
is_local{isLocalAddress({ host, port }, config.getInt("tcp_port", 0))}, , sample_block{sample_block}
pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, , context(context)
load_all_query{query_builder.composeLoadAllQuery()} , is_local{isLocalAddress({host, port}, config.getInt("tcp_port", 0))}
{} , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}
, load_all_query{query_builder.composeLoadAllQuery()}
{
}
ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other) ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other)
: update_time{other.update_time}, : update_time{other.update_time}
dict_struct{other.dict_struct}, , dict_struct{other.dict_struct}
host{other.host}, port{other.port}, , host{other.host}
secure{other.secure}, , port{other.port}
user{other.user}, password{other.password}, , secure{other.secure}
db{other.db}, table{other.table}, , user{other.user}
where{other.where}, , password{other.password}
update_field{other.update_field}, , db{other.db}
invalidate_query{other.invalidate_query}, , table{other.table}
invalidate_query_response{other.invalidate_query_response}, , where{other.where}
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, , update_field{other.update_field}
sample_block{other.sample_block}, context(other.context), , invalidate_query{other.invalidate_query}
is_local{other.is_local}, , invalidate_query_response{other.invalidate_query_response}
pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
load_all_query{other.load_all_query} , sample_block{other.sample_block}
{} , context(other.context)
, is_local{other.is_local}
, pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}
, load_all_query{other.load_all_query}
{
}
std::string ClickHouseDictionarySource::getUpdateFieldAndDate() std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
{ {
@ -119,17 +138,14 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll()
BlockInputStreamPtr ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids) BlockInputStreamPtr ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids)
{ {
return createStreamForSelectiveLoad( return createStreamForSelectiveLoad(query_builder.composeLoadIdsQuery(ids));
query_builder.composeLoadIdsQuery(ids));
} }
BlockInputStreamPtr ClickHouseDictionarySource::loadKeys( BlockInputStreamPtr ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
return createStreamForSelectiveLoad( return createStreamForSelectiveLoad(
query_builder.composeLoadKeysQuery( query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES));
key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES));
} }
bool ClickHouseDictionarySource::isModified() const bool ClickHouseDictionarySource::isModified() const

View File

@ -1,15 +1,14 @@
#pragma once #pragma once
#include "IDictionarySource.h" #include <memory>
#include <Client/ConnectionPoolWithFailover.h>
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h" #include "ExternalQueryBuilder.h"
#include <Client/ConnectionPoolWithFailover.h> #include "IDictionarySource.h"
#include <memory>
namespace DB namespace DB
{ {
/** Allows loading dictionaries from local or remote ClickHouse instance /** Allows loading dictionaries from local or remote ClickHouse instance
* @todo use ConnectionPoolWithFailover * @todo use ConnectionPoolWithFailover
* @todo invent a way to keep track of source modifications * @todo invent a way to keep track of source modifications
@ -17,10 +16,12 @@ namespace DB
class ClickHouseDictionarySource final : public IDictionarySource class ClickHouseDictionarySource final : public IDictionarySource
{ {
public: public:
ClickHouseDictionarySource(const DictionaryStructure & dict_struct_, ClickHouseDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
const Block & sample_block, Context & context); const Block & sample_block,
Context & context);
/// copy-constructor is provided in order to support cloneability /// copy-constructor is provided in order to support cloneability
ClickHouseDictionarySource(const ClickHouseDictionarySource & other); ClickHouseDictionarySource(const ClickHouseDictionarySource & other);
@ -31,8 +32,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override; BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override; bool isModified() const override;
bool supportsSelectiveLoad() const override { return true; } bool supportsSelectiveLoad() const override { return true; }

View File

@ -1,20 +1,19 @@
#include "ComplexKeyCacheDictionary.h" #include "ComplexKeyCacheDictionary.h"
#include "DictionaryBlockInputStream.h"
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/BitHelpers.h> #include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
#include <ext/range.h> #include <Common/ProfileEvents.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
#include <ext/map.h> #include <ext/map.h>
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
namespace ProfileEvents namespace ProfileEvents
{ {
extern const Event DictCacheKeysRequested; extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss; extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound; extern const Event DictCacheKeysRequestedFound;
@ -34,7 +33,6 @@ namespace CurrentMetrics
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -52,13 +50,19 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const
} }
ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
const size_t size) const size_t size)
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), : name{name}
size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, , dict_struct(dict_struct)
size_overlap_mask{this->size - 1}, , source_ptr{std::move(source_ptr)}
rnd_engine(randomSeed()) , dict_lifetime(dict_lifetime)
, size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}
, size_overlap_mask{this->size - 1}
, rnd_engine(randomSeed())
{ {
if (!this->source_ptr->supportsSelectiveLoad()) if (!this->source_ptr->supportsSelectiveLoad())
throw Exception{name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
@ -68,17 +72,18 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, c
ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other) ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other)
: ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} : ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size}
{} {
}
void ComplexKeyCacheDictionary::getString( void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
ColumnString * out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto null_value = StringRef{std::get<String>(attribute.null_values)}; const auto null_value = StringRef{std::get<String>(attribute.null_values)};
@ -86,27 +91,35 @@ void ComplexKeyCacheDictionary::getString(
} }
void ComplexKeyCacheDictionary::getString( void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const ColumnString * const def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); }); getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
} }
void ComplexKeyCacheDictionary::getString( void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const String & def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name); auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; }); getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
} }
@ -118,7 +131,8 @@ void ComplexKeyCacheDictionary::getString(
/// true true impossible /// true true impossible
/// ///
/// todo: split this func to two: find_for_get and find_for_set /// todo: split this func to two: find_for_get and find_for_set
ComplexKeyCacheDictionary::FindResult ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const ComplexKeyCacheDictionary::FindResult
ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const
{ {
auto pos = hash; auto pos = hash;
auto oldest_id = pos; auto oldest_id = pos;
@ -211,11 +225,14 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
return; return;
std::vector<size_t> required_rows(outdated_keys.size()); std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), std::transform(
[] (auto & pair) { return pair.second.front(); }); std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
/// request new values /// request new values
update(key_columns, keys_array, required_rows, update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const auto) [&](const StringRef key, const auto)
{ {
for (const auto out_idx : outdated_keys[key]) for (const auto out_idx : outdated_keys[key])
@ -242,7 +259,8 @@ void ComplexKeyCacheDictionary::createAttributes()
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
} }
} }
@ -273,8 +291,7 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
template <typename Pool> template <typename Pool>
StringRef ComplexKeyCacheDictionary::placeKeysInPool( StringRef ComplexKeyCacheDictionary::placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
{ {
const auto keys_size = key_columns.size(); const auto keys_size = key_columns.size();
size_t sum_keys_size{}; size_t sum_keys_size{};
@ -319,16 +336,21 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool(
/// Explicit instantiations. /// Explicit instantiations.
template StringRef ComplexKeyCacheDictionary::placeKeysInPool<Arena>( template StringRef ComplexKeyCacheDictionary::placeKeysInPool<Arena>(
const size_t row, const Columns & key_columns, StringRefs & keys, const size_t row,
const std::vector<DictionaryAttribute> & key_attributes, Arena & pool); const Columns & key_columns,
StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes,
Arena & pool);
template StringRef ComplexKeyCacheDictionary::placeKeysInPool<ArenaWithFreeLists>( template StringRef ComplexKeyCacheDictionary::placeKeysInPool<ArenaWithFreeLists>(
const size_t row, const Columns & key_columns, StringRefs & keys, const size_t row,
const std::vector<DictionaryAttribute> & key_attributes, ArenaWithFreeLists & pool); const Columns & key_columns,
StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes,
ArenaWithFreeLists & pool);
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const
const size_t row, const Columns & key_columns) const
{ {
const auto res = fixed_size_keys_pool->alloc(); const auto res = fixed_size_keys_pool->alloc();
auto place = res; auto place = res;
@ -360,8 +382,9 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const
{ {
return (cells[idx].key == StringRef{} && (idx != zero_cell_idx return (
|| cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()))); cells[idx].key == StringRef{}
&& (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
} }
BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
@ -371,8 +394,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
for (auto idx : ext::range(0, cells.size())) for (auto idx : ext::range(0, cells.size()))
if (!isEmptyCell(idx) if (!isEmptyCell(idx) && !cells[idx].isDefault())
&& !cells[idx].isDefault())
keys.push_back(cells[idx].key); keys.push_back(cells[idx].key);
} }
@ -382,13 +404,12 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
void registerDictionaryComplexKeyCache(DictionaryFactory & factory) void registerDictionaryComplexKeyCache(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (!dict_struct.key) if (!dict_struct.key)
throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS};
const auto & layout_prefix = config_prefix + ".layout"; const auto & layout_prefix = config_prefix + ".layout";

View File

@ -3,23 +3,23 @@
#include <atomic> #include <atomic>
#include <chrono> #include <chrono>
#include <map> #include <map>
#include <shared_mutex>
#include <variant> #include <variant>
#include <vector> #include <vector>
#include <shared_mutex>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <pcg_random.hpp>
#include <Common/ArenaWithFreeLists.h> #include <Common/ArenaWithFreeLists.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/ProfilingScopedRWLock.h> #include <Common/ProfilingScopedRWLock.h>
#include <Common/SmallObjectPool.h> #include <Common/SmallObjectPool.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <common/StringRef.h> #include <common/StringRef.h>
#include <ext/bit_cast.h> #include <ext/bit_cast.h>
#include <ext/map.h> #include <ext/map.h>
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include <pcg_random.hpp> #include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace ProfileEvents namespace ProfileEvents
@ -40,7 +40,8 @@ namespace DB
class ComplexKeyCacheDictionary final : public IDictionaryBase class ComplexKeyCacheDictionary final : public IDictionaryBase
{ {
public: public:
ComplexKeyCacheDictionary(const std::string & name, ComplexKeyCacheDictionary(
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, const DictionaryLifetime dict_lifetime,
@ -48,25 +49,13 @@ public:
ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other); ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other);
std::string getKeyDescription() const std::string getKeyDescription() const { return key_description; }
{
return key_description;
}
std::exception_ptr getCreationException() const override std::exception_ptr getCreationException() const override { return {}; }
{
return {};
}
std::string getName() const override std::string getName() const override { return name; }
{
return name;
}
std::string getTypeName() const override std::string getTypeName() const override { return "ComplexKeyCache"; }
{
return "ComplexKeyCache";
}
size_t getBytesAllocated() const override size_t getBytesAllocated() const override
{ {
@ -74,55 +63,28 @@ public:
+ (string_arena ? string_arena->size() : 0); + (string_arena ? string_arena->size() : 0);
} }
size_t getQueryCount() const override size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
{
return query_count.load(std::memory_order_relaxed);
}
double getHitRate() const override double getHitRate() const override
{ {
return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed);
} }
size_t getElementCount() const override size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); }
{
return element_count.load(std::memory_order_relaxed);
}
double getLoadFactor() const override double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; }
{
return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size;
}
bool isCached() const override bool isCached() const override { return true; }
{
return true;
}
std::unique_ptr<IExternalLoadable> clone() const override std::unique_ptr<IExternalLoadable> clone() const override { return std::make_unique<ComplexKeyCacheDictionary>(*this); }
{
return std::make_unique<ComplexKeyCacheDictionary>(*this);
}
const IDictionarySource * getSource() const override const IDictionarySource * getSource() const override { return source_ptr.get(); }
{
return source_ptr.get();
}
const DictionaryLifetime & getLifetime() const override const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
{
return dict_lifetime;
}
const DictionaryStructure & getStructure() const override const DictionaryStructure & getStructure() const override { return dict_struct; }
{
return dict_struct;
}
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -156,7 +118,8 @@ public:
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, \ void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \ const Columns & key_columns, \
const DataTypes & key_types, \ const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \ const PaddedPODArray<TYPE> & def, \
@ -177,14 +140,16 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString(const std::string & attribute_name, void getString(
const std::string & attribute_name,
const Columns & key_columns, const Columns & key_columns,
const DataTypes & key_types, const DataTypes & key_types,
const ColumnString * const def, const ColumnString * const def,
ColumnString * const out) const; ColumnString * const out) const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, \ void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \ const Columns & key_columns, \
const DataTypes & key_types, \ const DataTypes & key_types, \
const TYPE def, \ const TYPE def, \
@ -205,7 +170,8 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString(const std::string & attribute_name, void getString(
const std::string & attribute_name,
const Columns & key_columns, const Columns & key_columns,
const DataTypes & key_types, const DataTypes & key_types,
const String & def, const String & def,
@ -216,9 +182,12 @@ public:
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>; template <typename Value>
template <typename Value> using ContainerType = Value[]; using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>; template <typename Value>
using ContainerType = Value[];
template <typename Value>
using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
struct CellMetadata final struct CellMetadata final
{ {
@ -235,32 +204,35 @@ private:
time_point_urep_t data; time_point_urep_t data;
/// Sets expiration time, resets `is_default` flag to false /// Sets expiration time, resets `is_default` flag to false
time_point_t expiresAt() const time_point_t expiresAt() const { return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK); }
{ void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast<time_point_urep_t>(t); }
return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK);
}
void setExpiresAt(const time_point_t & t)
{
data = ext::safe_bit_cast<time_point_urep_t>(t);
}
bool isDefault() const bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; }
{ void setDefault() { data |= IS_DEFAULT_MASK; }
return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK;
}
void setDefault()
{
data |= IS_DEFAULT_MASK;
}
}; };
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant<UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, std::variant<
Decimal32, Decimal64, Decimal128, UInt8,
Float32, Float64, String> null_values; UInt16,
std::variant<ContainerPtrType<UInt8>, UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant<
ContainerPtrType<UInt8>,
ContainerPtrType<UInt16>, ContainerPtrType<UInt16>,
ContainerPtrType<UInt32>, ContainerPtrType<UInt32>,
ContainerPtrType<UInt64>, ContainerPtrType<UInt64>,
@ -283,8 +255,8 @@ private:
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename OutputType, typename DefaultGetter> template <typename OutputType, typename DefaultGetter>
void getItemsNumber( void
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const getItemsNumber(Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
{ {
if (false) if (false)
{ {
@ -372,7 +344,8 @@ private:
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); }); std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
/// request new values /// request new values
update(key_columns, update(
key_columns,
keys_array, keys_array,
required_rows, required_rows,
[&](const StringRef key, const size_t cell_idx) [&](const StringRef key, const size_t cell_idx)
@ -497,7 +470,8 @@ private:
return pair.second.front(); return pair.second.front();
}); });
update(key_columns, update(
key_columns,
keys_array, keys_array,
required_rows, required_rows,
[&](const StringRef key, const size_t cell_idx) [&](const StringRef key, const size_t cell_idx)
@ -531,7 +505,8 @@ private:
} }
template <typename PresentKeyHandler, typename AbsentKeyHandler> template <typename PresentKeyHandler, typename AbsentKeyHandler>
void update(const Columns & in_key_columns, void update(
const Columns & in_key_columns,
const PODArray<StringRef> & in_keys, const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows, const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated, PresentKeyHandler && on_cell_updated,
@ -561,8 +536,10 @@ private:
const auto key_columns = ext::map<Columns>( const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
[&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); {
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows(); const auto rows_num = block.rows();
@ -693,7 +670,8 @@ private:
void freeKey(const StringRef key) const; void freeKey(const StringRef key) const;
template <typename Arena> template <typename Arena>
static StringRef placeKeysInPool(const size_t row, static StringRef placeKeysInPool(
const size_t row,
const Columns & key_columns, const Columns & key_columns,
StringRefs & keys, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, const std::vector<DictionaryAttribute> & key_attributes,

View File

@ -2,8 +2,8 @@
namespace DB namespace DB
{ {
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::Attribute ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{ {
Attribute attr{type, {}, {}}; Attribute attr{type, {}, {}};

View File

@ -2,26 +2,53 @@
namespace DB namespace DB
{ {
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>(); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>(); break; std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>(); break; break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>(); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>(); break; std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>(); break; break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>(); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>(); break; std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>(); break; break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>(); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>(); break; std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Decimal32: std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>(); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>(); break; std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
case AttributeUnderlyingType::Decimal128: std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>(); break; break;
case AttributeUnderlyingType::Decimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
break;
case AttributeUnderlyingType::Decimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {

View File

@ -2,22 +2,43 @@
namespace DB namespace DB
{ {
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values); break; std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values); break; break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values); break; std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values); break; break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values); break; std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values); break; break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values); break; std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
break;
case AttributeUnderlyingType::Decimal32: case AttributeUnderlyingType::Decimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values); std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);

View File

@ -1,12 +1,11 @@
#include "ComplexKeyHashedDictionary.h"
#include <ext/map.h> #include <ext/map.h>
#include <ext/range.h> #include <ext/range.h>
#include "ComplexKeyHashedDictionary.h"
#include "DictionaryBlockInputStream.h" #include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -16,12 +15,19 @@ namespace ErrorCodes
} }
ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & name,
const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block) const DictionaryStructure & dict_struct,
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), DictionarySourcePtr source_ptr,
require_nonempty(require_nonempty), saved_block{std::move(saved_block)} const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block)
: name{name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, require_nonempty(require_nonempty)
, saved_block{std::move(saved_block)}
{ {
createAttributes(); createAttributes();
try try
@ -38,24 +44,27 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(
} }
ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other) ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other)
: ComplexKeyHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} : ComplexKeyHashedDictionary{
other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block}
{ {
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \ void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
ResultArrayType<TYPE> & out) const\
{ \ { \
dict_struct.validateKeyTypes(key_types); \ dict_struct.validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
const auto null_value = std::get<TYPE>(attribute.null_values); \ const auto null_value = std::get<TYPE>(attribute.null_values); \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \ [&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \ [&](const size_t) { return null_value; }); \
} }
@ -76,34 +85,42 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void ComplexKeyHashedDictionary::getString( void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
ColumnString * out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto & null_value = StringRef{std::get<String>(attribute.null_values)}; const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \ void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const PaddedPODArray<TYPE> & def, ResultArrayType<TYPE> & out) const\ const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \ { \
dict_struct.validateKeyTypes(key_types); \ dict_struct.validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \ [&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \ [&](const size_t row) { return def[row]; }); \
} }
@ -124,34 +141,43 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void ComplexKeyHashedDictionary::getString( void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const ColumnString * const def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); }); [&](const size_t row) { return def->getDataAt(row); });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void ComplexKeyHashedDictionary::get##TYPE( \ void ComplexKeyHashedDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const TYPE def, ResultArrayType<TYPE> & out) const\ const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \ { \
dict_struct.validateKeyTypes(key_types); \ dict_struct.validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
[&] (const size_t) { return def; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -170,16 +196,22 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void ComplexKeyHashedDictionary::getString( void ComplexKeyHashedDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const String & def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{ {
dict_struct.validateKeyTypes(key_types); dict_struct.validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; }); [&](const size_t) { return StringRef{def}; });
} }
@ -192,22 +224,52 @@ void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataType
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: has<UInt8>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: has<UInt16>(attribute, key_columns, out); break; has<UInt8>(attribute, key_columns, out);
case AttributeUnderlyingType::UInt32: has<UInt32>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::UInt64: has<UInt64>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: has<UInt128>(attribute, key_columns, out); break; has<UInt16>(attribute, key_columns, out);
case AttributeUnderlyingType::Int8: has<Int8>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Int16: has<Int16>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: has<Int32>(attribute, key_columns, out); break; has<UInt32>(attribute, key_columns, out);
case AttributeUnderlyingType::Int64: has<Int64>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Float32: has<Float32>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: has<Float64>(attribute, key_columns, out); break; has<UInt64>(attribute, key_columns, out);
case AttributeUnderlyingType::String: has<StringRef>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::UInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Float32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Float64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::String:
has<StringRef>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Decimal32: has<Decimal32>(attribute, key_columns, out); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: has<Decimal64>(attribute, key_columns, out); break; has<Decimal32>(attribute, key_columns, out);
case AttributeUnderlyingType::Decimal128: has<Decimal128>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Decimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Decimal128:
has<Decimal128>(attribute, key_columns, out);
break;
} }
} }
@ -222,7 +284,8 @@ void ComplexKeyHashedDictionary::createAttributes()
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
} }
} }
@ -236,14 +299,10 @@ void ComplexKeyHashedDictionary::blockToAttributes(const Block & block)
const auto rows = block.rows(); const auto rows = block.rows();
element_count += rows; element_count += rows;
const auto key_column_ptrs = ext::map<Columns>(ext::range(0, keys_size), const auto key_column_ptrs = ext::map<Columns>(
[&](const size_t attribute_idx) ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
{
return block.safeGetByPosition(attribute_idx).column;
});
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
[&](const size_t attribute_idx)
{ {
return block.safeGetByPosition(keys_size + attribute_idx).column; return block.safeGetByPosition(keys_size + attribute_idx).column;
}); });
@ -304,15 +363,11 @@ void ComplexKeyHashedDictionary::updateData()
stream->readPrefix(); stream->readPrefix();
while (Block block = stream->read()) while (Block block = stream->read())
{ {
const auto saved_key_column_ptrs = ext::map<Columns>(ext::range(0, keys_size), [&](const size_t key_idx) const auto saved_key_column_ptrs = ext::map<Columns>(
{ ext::range(0, keys_size), [&](const size_t key_idx) { return saved_block->safeGetByPosition(key_idx).column; });
return saved_block->safeGetByPosition(key_idx).column;
});
const auto update_key_column_ptrs = ext::map<Columns>(ext::range(0, keys_size), [&](const size_t key_idx) const auto update_key_column_ptrs = ext::map<Columns>(
{ ext::range(0, keys_size), [&](const size_t key_idx) { return block.safeGetByPosition(key_idx).column; });
return block.safeGetByPosition(key_idx).column;
});
Arena temp_key_pool; Arena temp_key_pool;
ContainerType<std::vector<size_t>> update_key_hash; ContainerType<std::vector<size_t>> update_key_hash;
@ -389,21 +444,49 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated()
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break; addAttributeSize<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break; break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: addAttributeSize<UInt128>(attribute); break; addAttributeSize<UInt16>(attribute);
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break; break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break; addAttributeSize<UInt32>(attribute);
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break; break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break; addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::UInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::Int8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::Int16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::Int32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::Int64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::Float32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::Float64:
addAttributeSize<Float64>(attribute);
break;
case AttributeUnderlyingType::Decimal32: addAttributeSize<Decimal32>(attribute); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: addAttributeSize<Decimal64>(attribute); break; addAttributeSize<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: addAttributeSize<Decimal128>(attribute); break; break;
case AttributeUnderlyingType::Decimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::Decimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -425,27 +508,56 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons
attribute.maps.emplace<ContainerType<T>>(); attribute.maps.emplace<ContainerType<T>>();
} }
ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) ComplexKeyHashedDictionary::Attribute
ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{ {
Attribute attr{type, {}, {}, {}}; Attribute attr{type, {}, {}, {}};
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break; createAttributeImpl<UInt8>(attr, null_value);
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: createAttributeImpl<UInt128>(attr, null_value); break; createAttributeImpl<UInt16>(attr, null_value);
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break; break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break; createAttributeImpl<UInt32>(attr, null_value);
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break; break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break; createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::UInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::Int8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::Int16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::Int32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::Int64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::Float32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::Float64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal32: createAttributeImpl<Decimal32>(attr, null_value); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: createAttributeImpl<Decimal64>(attr, null_value); break; createAttributeImpl<Decimal32>(attr, null_value);
case AttributeUnderlyingType::Decimal128: createAttributeImpl<Decimal128>(attr, null_value); break; break;
case AttributeUnderlyingType::Decimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -462,15 +574,14 @@ ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttribut
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void ComplexKeyHashedDictionary::getItemsNumber( void ComplexKeyHashedDictionary::getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
if (false) {} if (false)
{
}
#define DISPATCH(TYPE) \ #define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) getItemsImpl<TYPE, OutputType>( \
getItemsImpl<TYPE, OutputType>(attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default)); attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
DISPATCH(UInt8) DISPATCH(UInt8)
DISPATCH(UInt16) DISPATCH(UInt16)
DISPATCH(UInt32) DISPATCH(UInt32)
@ -486,16 +597,12 @@ void ComplexKeyHashedDictionary::getItemsNumber(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void ComplexKeyHashedDictionary::getItemsImpl( void ComplexKeyHashedDictionary::getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps); const auto & attr = std::get<ContainerType<AttributeType>>(attribute.maps);
@ -532,21 +639,35 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>()); case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>()); return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>()); case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt64: return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>()); return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt128: return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>()); case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int8: return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>()); return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::Int16: return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>()); case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int32: return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>()); return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::Int64: return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>()); case AttributeUnderlyingType::UInt128:
case AttributeUnderlyingType::Float32: return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>()); return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
case AttributeUnderlyingType::Float64: return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>()); case AttributeUnderlyingType::Int8:
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int16:
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int32:
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int64:
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Float32:
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::Float64:
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::Decimal32: return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>()); case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>()); return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
case AttributeUnderlyingType::Decimal128: return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>()); case AttributeUnderlyingType::Decimal64:
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
case AttributeUnderlyingType::Decimal128:
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -570,8 +691,7 @@ const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAtt
return attributes[it->second]; return attributes[it->second];
} }
StringRef ComplexKeyHashedDictionary::placeKeysInPool( StringRef ComplexKeyHashedDictionary::placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool)
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool)
{ {
const auto keys_size = key_columns.size(); const auto keys_size = key_columns.size();
size_t sum_keys_size{}; size_t sum_keys_size{};
@ -623,22 +743,37 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: return getKeys<UInt8>(attribute); case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: return getKeys<UInt16>(attribute); return getKeys<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: return getKeys<UInt32>(attribute); case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt64: return getKeys<UInt64>(attribute); return getKeys<UInt16>(attribute);
case AttributeUnderlyingType::UInt128: return getKeys<UInt128>(attribute); case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int8: return getKeys<Int8>(attribute); return getKeys<UInt32>(attribute);
case AttributeUnderlyingType::Int16: return getKeys<Int16>(attribute); case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int32: return getKeys<Int32>(attribute); return getKeys<UInt64>(attribute);
case AttributeUnderlyingType::Int64: return getKeys<Int64>(attribute); case AttributeUnderlyingType::UInt128:
case AttributeUnderlyingType::Float32: return getKeys<Float32>(attribute); return getKeys<UInt128>(attribute);
case AttributeUnderlyingType::Float64: return getKeys<Float64>(attribute); case AttributeUnderlyingType::Int8:
case AttributeUnderlyingType::String: return getKeys<StringRef>(attribute); return getKeys<Int8>(attribute);
case AttributeUnderlyingType::Int16:
return getKeys<Int16>(attribute);
case AttributeUnderlyingType::Int32:
return getKeys<Int32>(attribute);
case AttributeUnderlyingType::Int64:
return getKeys<Int64>(attribute);
case AttributeUnderlyingType::Float32:
return getKeys<Float32>(attribute);
case AttributeUnderlyingType::Float64:
return getKeys<Float64>(attribute);
case AttributeUnderlyingType::String:
return getKeys<StringRef>(attribute);
case AttributeUnderlyingType::Decimal32: return getKeys<Decimal32>(attribute); case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: return getKeys<Decimal64>(attribute); return getKeys<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: return getKeys<Decimal128>(attribute); case AttributeUnderlyingType::Decimal64:
return getKeys<Decimal64>(attribute);
case AttributeUnderlyingType::Decimal128:
return getKeys<Decimal128>(attribute);
} }
return {}; return {};
} }
@ -663,13 +798,12 @@ BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names
void registerDictionaryComplexKeyHashed(DictionaryFactory & factory) void registerDictionaryComplexKeyHashed(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (!dict_struct.key) if (!dict_struct.key)
throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS};

View File

@ -1,30 +1,33 @@
#pragma once #pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <common/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.h>
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include <variant> #include <variant>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Common/HashTable/HashMap.h>
#include <common/StringRef.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB namespace DB
{ {
using BlockPtr = std::shared_ptr<Block>; using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyHashedDictionary final : public IDictionaryBase class ComplexKeyHashedDictionary final : public IDictionaryBase
{ {
public: public:
ComplexKeyHashedDictionary( ComplexKeyHashedDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & name,
const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr); const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block = nullptr);
ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other); ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other);
@ -56,10 +59,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -71,7 +71,31 @@ public:
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const; ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -90,13 +114,19 @@ public:
#undef DECLARE #undef DECLARE
void getString( void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
ColumnString * out) const; const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const PaddedPODArray<TYPE> & def, ResultArrayType<TYPE> & out) const; const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
DECLARE(UInt32) DECLARE(UInt32)
@ -114,57 +144,57 @@ public:
#undef DECLARE #undef DECLARE
void getString( void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const ColumnString * const def, ColumnString * const out) const; const Columns & key_columns,
const DataTypes & key_types,
#define DECLARE(TYPE)\ const String & def,
void get##TYPE(\ ColumnString * const out) const;
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\
const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>; template <typename Value>
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant< std::variant<
UInt8, UInt16, UInt32, UInt64, UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
String> null_values; Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant< std::variant<
ContainerType<UInt8>, ContainerType<UInt16>, ContainerType<UInt32>, ContainerType<UInt64>, ContainerType<UInt8>,
ContainerType<UInt16>,
ContainerType<UInt32>,
ContainerType<UInt64>,
ContainerType<UInt128>, ContainerType<UInt128>,
ContainerType<Int8>, ContainerType<Int16>, ContainerType<Int32>, ContainerType<Int64>, ContainerType<Int8>,
ContainerType<Decimal32>, ContainerType<Decimal64>, ContainerType<Decimal128>, ContainerType<Int16>,
ContainerType<Float32>, ContainerType<Float64>, ContainerType<Int32>,
ContainerType<StringRef>> maps; ContainerType<Int64>,
ContainerType<Decimal32>,
ContainerType<Decimal64>,
ContainerType<Decimal128>,
ContainerType<Float32>,
ContainerType<Float64>,
ContainerType<StringRef>>
maps;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -188,18 +218,12 @@ private:
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber( void
const Attribute & attribute, getItemsNumber(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl( void
const Attribute & attribute, getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename T> template <typename T>
@ -209,8 +233,7 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const; const Attribute & getAttribute(const std::string & attribute_name) const;
static StringRef placeKeysInPool( static StringRef placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
template <typename T> template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const; void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;

View File

@ -1,22 +1,21 @@
#pragma once #pragma once
#include <Columns/ColumnVector.h> #include <memory>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Core/Names.h>
#include <DataStreams/IProfilingBlockInputStream.h> #include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <common/logger_useful.h>
#include <ext/range.h>
#include "DictionaryBlockInputStreamBase.h" #include "DictionaryBlockInputStreamBase.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "IDictionary.h" #include "IDictionary.h"
#include <ext/range.h>
#include <common/logger_useful.h>
#include <Core/Names.h>
#include <memory>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
@ -32,28 +31,30 @@ class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
public: public:
using DictionaryPtr = std::shared_ptr<DictionaryType const>; using DictionaryPtr = std::shared_ptr<DictionaryType const>;
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, DictionaryBlockInputStream(
PaddedPODArray<Key> && ids, const Names & column_names); std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, DictionaryBlockInputStream(
const std::vector<StringRef> & keys, const Names & column_names); std::shared_ptr<const IDictionaryBase> dictionary,
size_t max_block_size,
const std::vector<StringRef> & keys,
const Names & column_names);
using GetColumnsFunction = using GetColumnsFunction = std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
// Used to separate key columns format for storage and view. // Used to separate key columns format for storage and view.
// Calls get_key_columns_function to get key column for dictionary get fuction call // Calls get_key_columns_function to get key column for dictionary get fuction call
// and get_view_columns_function to get key representation. // and get_view_columns_function to get key representation.
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string // Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, DictionaryBlockInputStream(
const Columns & data_columns, const Names & column_names, std::shared_ptr<const IDictionaryBase> dictionary,
size_t max_block_size,
const Columns & data_columns,
const Names & column_names,
GetColumnsFunction && get_key_columns_function, GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function); GetColumnsFunction && get_view_columns_function);
String getName() const override String getName() const override { return "Dictionary"; }
{
return "Dictionary";
}
protected: protected:
Block getBlock(size_t start, size_t size) const override; Block getBlock(size_t start, size_t size) const override;
@ -65,8 +66,8 @@ private:
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const; using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
template <typename Type> template <typename Type>
using DictionaryDecimalGetter = using DictionaryDecimalGetter
void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const; = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const; using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
@ -75,61 +76,103 @@ private:
using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const; using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
template <typename Type> template <typename Type>
using DecimalGetterByKey = using DecimalGetterByKey
void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const; = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const; using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
// call getXXX // call getXXX
// for single key dictionaries // for single key dictionaries
template <typename Type, typename Container> template <typename Type, typename Container>
void callGetter(DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, DictionaryGetter<Type> getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container> template <typename Type, typename Container>
void callGetter(DictionaryDecimalGetter<Type> getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, DictionaryDecimalGetter<Type> getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container> template <typename Container>
void callGetter(DictionaryStringGetter getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, DictionaryStringGetter getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
// for complex complex key dictionaries // for complex complex key dictionaries
template <typename Type, typename Container> template <typename Type, typename Container>
void callGetter(GetterByKey<Type> getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, GetterByKey<Type> getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Type, typename Container> template <typename Type, typename Container>
void callGetter(DecimalGetterByKey<Type> getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, DecimalGetterByKey<Type> getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Container> template <typename Container>
void callGetter(StringGetterByKey getter, const PaddedPODArray<Key> & ids_to_fill, void callGetter(
const Columns & keys, const DataTypes & data_types, StringGetterByKey getter,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter> template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
Block fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, Block
const DataTypes & types, ColumnsWithTypeAndName && view) const; fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
template <typename AttributeType, typename Getter> template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(Getter getter, const PaddedPODArray<Key> & ids_to_fill, ColumnPtr getColumnFromAttribute(
const Columns & keys, const DataTypes & data_types, Getter getter,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
template <typename Getter> template <typename Getter>
ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray<Key> & ids_to_fill, ColumnPtr getColumnFromStringAttribute(
const Columns & keys, const DataTypes & data_types, Getter getter,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dictionary) const;
ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const; ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
void fillKeyColumns(const std::vector<StringRef> & keys, size_t start, size_t size, void fillKeyColumns(
const DictionaryStructure & dictionary_structure, ColumnsWithTypeAndName & columns) const; const std::vector<StringRef> & keys,
size_t start,
size_t size,
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & columns) const;
DictionaryPtr dictionary; DictionaryPtr dictionary;
Names column_names; Names column_names;
@ -138,8 +181,7 @@ private:
Poco::Logger * logger; Poco::Logger * logger;
using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)( using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
const DataTypes & types, ColumnsWithTypeAndName && view) const;
FillBlockFunction fill_block_function; FillBlockFunction fill_block_function;
@ -160,27 +202,30 @@ private:
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream( DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, PaddedPODArray<Key> && ids, const Names & column_names)
PaddedPODArray<Key> && ids, const Names & column_names) : DictionaryBlockInputStreamBase(ids.size(), max_block_size)
: DictionaryBlockInputStreamBase(ids.size(), max_block_size), , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary))
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), , column_names(column_names)
column_names(column_names), ids(std::move(ids)), , ids(std::move(ids))
logger(&Poco::Logger::get("DictionaryBlockInputStream")), , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
fill_block_function( , fill_block_function(
&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>), &DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
key_type(DictionaryKeyType::Id) , key_type(DictionaryKeyType::Id)
{ {
} }
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream( DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, std::shared_ptr<const IDictionaryBase> dictionary,
const std::vector<StringRef> & keys, const Names & column_names) size_t max_block_size,
: DictionaryBlockInputStreamBase(keys.size(), max_block_size), const std::vector<StringRef> & keys,
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names), const Names & column_names)
logger(&Poco::Logger::get("DictionaryBlockInputStream")), : DictionaryBlockInputStreamBase(keys.size(), max_block_size)
fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>), , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary))
key_type(DictionaryKeyType::ComplexKey) , column_names(column_names)
, logger(&Poco::Logger::get("DictionaryBlockInputStream"))
, fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
, key_type(DictionaryKeyType::ComplexKey)
{ {
const DictionaryStructure & dictionaty_structure = dictionary->getStructure(); const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns); fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
@ -188,17 +233,21 @@ DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream( DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size, std::shared_ptr<const IDictionaryBase> dictionary,
const Columns & data_columns, const Names & column_names, size_t max_block_size,
const Columns & data_columns,
const Names & column_names,
GetColumnsFunction && get_key_columns_function, GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function) GetColumnsFunction && get_view_columns_function)
: DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size), : DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size)
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names), , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary))
logger(&Poco::Logger::get("DictionaryBlockInputStream")), , column_names(column_names)
fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>), , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
data_columns(data_columns), , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
get_key_columns_function(get_key_columns_function), get_view_columns_function(get_view_columns_function), , data_columns(data_columns)
key_type(DictionaryKeyType::Callback) , get_key_columns_function(get_key_columns_function)
, get_view_columns_function(get_view_columns_function)
, key_type(DictionaryKeyType::Callback)
{ {
} }
@ -256,9 +305,13 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, si
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Type, typename Container> template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids_to_fill, DictionaryGetter<Type> getter,
const Columns & /*keys*/, const DataTypes & /*data_types*/, const PaddedPODArray<Key> & ids_to_fill,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, ids_to_fill, container); (dict.*getter)(attribute.name, ids_to_fill, container);
} }
@ -266,9 +319,13 @@ void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Type, typename Container> template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryDecimalGetter<Type> getter, const PaddedPODArray<Key> & ids_to_fill, DictionaryDecimalGetter<Type> getter,
const Columns & /*keys*/, const DataTypes & /*data_types*/, const PaddedPODArray<Key> & ids_to_fill,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, ids_to_fill, container); (dict.*getter)(attribute.name, ids_to_fill, container);
} }
@ -276,9 +333,13 @@ void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Container> template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryStringGetter getter, const PaddedPODArray<Key> & ids_to_fill, DictionaryStringGetter getter,
const Columns & /*keys*/, const DataTypes & /*data_types*/, const PaddedPODArray<Key> & ids_to_fill,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & /*keys*/,
const DataTypes & /*data_types*/,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, ids_to_fill, container); (dict.*getter)(attribute.name, ids_to_fill, container);
} }
@ -286,9 +347,13 @@ void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Type, typename Container> template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
GetterByKey<Type> getter, const PaddedPODArray<Key> & /*ids_to_fill*/, GetterByKey<Type> getter,
const Columns & keys, const DataTypes & data_types, const PaddedPODArray<Key> & /*ids_to_fill*/,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, keys, data_types, container); (dict.*getter)(attribute.name, keys, data_types, container);
} }
@ -296,9 +361,13 @@ void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Type, typename Container> template <typename Type, typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DecimalGetterByKey<Type> getter, const PaddedPODArray<Key> & /*ids_to_fill*/, DecimalGetterByKey<Type> getter,
const Columns & keys, const DataTypes & data_types, const PaddedPODArray<Key> & /*ids_to_fill*/,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, keys, data_types, container); (dict.*getter)(attribute.name, keys, data_types, container);
} }
@ -306,9 +375,13 @@ void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Container> template <typename Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter( void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
StringGetterByKey getter, const PaddedPODArray<Key> & /*ids_to_fill*/, StringGetterByKey getter,
const Columns & keys, const DataTypes & data_types, const PaddedPODArray<Key> & /*ids_to_fill*/,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & keys,
const DataTypes & data_types,
Container & container,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
(dict.*getter)(attribute.name, keys, data_types, container); (dict.*getter)(attribute.name, keys, data_types, container);
} }
@ -346,8 +419,7 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
{ {
ColumnPtr column; ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \ #define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE, Getter<TYPE>>( \ column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
switch (attribute.underlying_type) switch (attribute.underlying_type)
{ {
case AttributeUnderlyingType::UInt8: case AttributeUnderlyingType::UInt8:
@ -419,9 +491,12 @@ Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename AttributeType, typename Getter> template <typename AttributeType, typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute( ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
Getter getter, const PaddedPODArray<Key> & ids_to_fill, Getter getter,
const Columns & keys, const DataTypes & data_types, const PaddedPODArray<Key> & ids_to_fill,
const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
if constexpr (IsDecimalNumber<AttributeType>) if constexpr (IsDecimalNumber<AttributeType>)
{ {
@ -447,9 +522,12 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribut
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
template <typename Getter> template <typename Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute( ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
Getter getter, const PaddedPODArray<Key> & ids_to_fill, Getter getter,
const Columns & keys, const DataTypes & data_types, const PaddedPODArray<Key> & ids_to_fill,
const DictionaryAttribute & attribute, const DictionaryType & dict) const const Columns & keys,
const DataTypes & data_types,
const DictionaryAttribute & attribute,
const DictionaryType & dict) const
{ {
auto column_string = ColumnString::create(); auto column_string = ColumnString::create();
auto ptr = column_string.get(); auto ptr = column_string.get();
@ -471,8 +549,11 @@ ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(cons
template <typename DictionaryType, typename Key> template <typename DictionaryType, typename Key>
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns( void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
const std::vector<StringRef> & keys, size_t start, size_t size, const std::vector<StringRef> & keys,
const DictionaryStructure & dictionary_structure, ColumnsWithTypeAndName & res) const size_t start,
size_t size,
const DictionaryStructure & dictionary_structure,
ColumnsWithTypeAndName & res) const
{ {
MutableColumns columns; MutableColumns columns;
columns.reserve(dictionary_structure.key->size()); columns.reserve(dictionary_structure.key->size());
@ -489,7 +570,8 @@ void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
} }
for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i) for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
res.emplace_back(ColumnWithTypeAndName{ std::move(columns[i]), (*dictionary_structure.key)[i].type, (*dictionary_structure.key)[i].name }); res.emplace_back(
ColumnWithTypeAndName{std::move(columns[i]), (*dictionary_structure.key)[i].type, (*dictionary_structure.key)[i].name});
} }
} }

View File

@ -2,7 +2,6 @@
namespace DB namespace DB
{ {
DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size) DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size)
: rows_count(rows_count), max_block_size(max_block_size) : rows_count(rows_count), max_block_size(max_block_size)
{ {

View File

@ -4,7 +4,6 @@
namespace DB namespace DB
{ {
class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream
{ {
protected: protected:

View File

@ -1,16 +1,15 @@
#include "DictionarySourceHelpers.h" #include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h"
#include <Core/ColumnWithTypeAndName.h>
#include <Core/Block.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h> #include <Core/Block.h>
#include <Core/ColumnWithTypeAndName.h>
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include "DictionaryStructure.h"
namespace DB namespace DB
{ {
/// For simple key /// For simple key
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids) void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
{ {
@ -26,8 +25,11 @@ void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
} }
/// For composite key /// For composite key
void formatKeys(const DictionaryStructure & dict_struct, BlockOutputStreamPtr & out, void formatKeys(
const Columns & key_columns, const std::vector<size_t> & requested_rows) const DictionaryStructure & dict_struct,
BlockOutputStreamPtr & out,
const Columns & key_columns,
const std::vector<size_t> & requested_rows)
{ {
Block block; Block block;
for (size_t i = 0, size = key_columns.size(); i < size; ++i) for (size_t i = 0, size = key_columns.size(); i < size; ++i)

View File

@ -1,13 +1,12 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include <common/Types.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <common/Types.h>
namespace DB namespace DB
{ {
class IBlockOutputStream; class IBlockOutputStream;
using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>; using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>;
@ -19,7 +18,10 @@ struct DictionaryStructure;
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids); void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids);
/// For composite key /// For composite key
void formatKeys(const DictionaryStructure & dict_struct, BlockOutputStreamPtr & out, void formatKeys(
const Columns & key_columns, const std::vector<size_t> & requested_rows); const DictionaryStructure & dict_struct,
BlockOutputStreamPtr & out,
const Columns & key_columns,
const std::vector<size_t> & requested_rows);
} }

View File

@ -1,20 +1,19 @@
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include <Formats/FormatSettings.h> #include <Columns/IColumn.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <Columns/IColumn.h> #include <Formats/FormatSettings.h>
#include <Common/StringUtils/StringUtils.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/StringUtils/StringUtils.h>
#include <ext/range.h>
#include <numeric> #include <numeric>
#include <unordered_set>
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <ext/range.h>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_TYPE; extern const int UNKNOWN_TYPE;
@ -26,9 +25,7 @@ namespace ErrorCodes
namespace namespace
{ {
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
const std::string & config_prefix,
const std::string& default_type)
{ {
const auto name = config.getString(config_prefix + ".name", ""); const auto name = config.getString(config_prefix + ".name", "");
const auto expression = config.getString(config_prefix + ".expression", ""); const auto expression = config.getString(config_prefix + ".expression", "");
@ -123,21 +120,36 @@ std::string toString(const AttributeUnderlyingType type)
{ {
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: return "UInt8"; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: return "UInt16"; return "UInt8";
case AttributeUnderlyingType::UInt32: return "UInt32"; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt64: return "UInt64"; return "UInt16";
case AttributeUnderlyingType::UInt128: return "UUID"; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int8: return "Int8"; return "UInt32";
case AttributeUnderlyingType::Int16: return "Int16"; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int32: return "Int32"; return "UInt64";
case AttributeUnderlyingType::Int64: return "Int64"; case AttributeUnderlyingType::UInt128:
case AttributeUnderlyingType::Float32: return "Float32"; return "UUID";
case AttributeUnderlyingType::Float64: return "Float64"; case AttributeUnderlyingType::Int8:
case AttributeUnderlyingType::Decimal32: return "Decimal32"; return "Int8";
case AttributeUnderlyingType::Decimal64: return "Decimal64"; case AttributeUnderlyingType::Int16:
case AttributeUnderlyingType::Decimal128: return "Decimal128"; return "Int16";
case AttributeUnderlyingType::String: return "String"; case AttributeUnderlyingType::Int32:
return "Int32";
case AttributeUnderlyingType::Int64:
return "Int64";
case AttributeUnderlyingType::Float32:
return "Float32";
case AttributeUnderlyingType::Float64:
return "Float64";
case AttributeUnderlyingType::Decimal32:
return "Decimal32";
case AttributeUnderlyingType::Decimal64:
return "Decimal64";
case AttributeUnderlyingType::Decimal128:
return "Decimal128";
case AttributeUnderlyingType::String:
return "String";
} }
throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND};
@ -145,8 +157,7 @@ std::string toString(const AttributeUnderlyingType type)
DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
: name{config.getString(config_prefix + ".name", "")}, : name{config.getString(config_prefix + ".name", "")}, expression{config.getString(config_prefix + ".expression", "")}
expression{config.getString(config_prefix + ".expression", "")}
{ {
if (name.empty() && !expression.empty()) if (name.empty() && !expression.empty())
throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
@ -186,14 +197,18 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
if (range_min.has_value() != range_max.has_value()) if (range_min.has_value() != range_max.has_value())
{ {
throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.",
ErrorCodes::BAD_ARGUMENTS};
} }
if (range_min && range_max && !range_min->type->equals(*range_max->type)) if (range_min && range_max && !range_min->type->equals(*range_max->type))
{ {
throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, " throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, "
"'range_min' type: " + range_min->type->getName() + ", " "'range_min' type: "
"'range_max' type: " + range_max->type->getName(), + range_min->type->getName()
+ ", "
"'range_max' type: "
+ range_max->type->getName(),
ErrorCodes::BAD_ARGUMENTS}; ErrorCodes::BAD_ARGUMENTS};
} }
@ -201,13 +216,12 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
{ {
if (!range_min->type->isValueRepresentedByInteger()) if (!range_min->type->isValueRepresentedByInteger())
throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is " + range_min->type->getName(), " Actual 'range_min' and 'range_max' type is "
+ range_min->type->getName(),
ErrorCodes::BAD_ARGUMENTS}; ErrorCodes::BAD_ARGUMENTS};
} }
if (!id->expression.empty() || if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
(range_min && !range_min->expression.empty()) ||
(range_max && !range_max->expression.empty()))
has_expressions = true; has_expressions = true;
} }
@ -228,8 +242,9 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
const auto & actual_type = key_types[i]->getName(); const auto & actual_type = key_types[i]->getName();
if (expected_type != actual_type) if (expected_type != actual_type)
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
", found " + actual_type, ErrorCodes::TYPE_MISMATCH}; + actual_type,
ErrorCodes::TYPE_MISMATCH};
} }
} }
@ -274,15 +289,17 @@ bool DictionaryStructure::isKeySizeFixed() const
size_t DictionaryStructure::getKeySize() const size_t DictionaryStructure::getKeySize() const
{ {
return std::accumulate(std::begin(*key), std::end(*key), size_t{}, return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i)
[] (const auto running_size, const auto & key_i) {return running_size + key_i.type->getSizeOfValueInMemory(); }); {
return running_size + key_i.type->getSizeOfValueInMemory();
});
} }
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys) static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
{ {
static const std::unordered_set<std::string> valid_keys = static const std::unordered_set<std::string> valid_keys
{ "name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id" }; = {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
for (const auto & key : keys) for (const auto & key : keys)
{ {
@ -293,8 +310,10 @@ static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & k
std::vector<DictionaryAttribute> DictionaryStructure::getAttributes( std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Poco::Util::AbstractConfiguration & config,
const bool hierarchy_allowed, const bool allow_null_values) const std::string & config_prefix,
const bool hierarchy_allowed,
const bool allow_null_values)
{ {
Poco::Util::AbstractConfiguration::Keys config_elems; Poco::Util::AbstractConfiguration::Keys config_elems;
config.keys(config_prefix, config_elems); config.keys(config_prefix, config_elems);
@ -361,9 +380,8 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
has_hierarchy = has_hierarchy || hierarchical; has_hierarchy = has_hierarchy || hierarchical;
res_attributes.emplace_back(DictionaryAttribute{ res_attributes.emplace_back(
name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
});
} }
return res_attributes; return res_attributes;

View File

@ -5,15 +5,14 @@
#include <Interpreters/IExternalLoadable.h> #include <Interpreters/IExternalLoadable.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
#include <vector>
#include <string>
#include <map> #include <map>
#include <optional> #include <optional>
#include <string>
#include <vector>
namespace DB namespace DB
{ {
enum class AttributeUnderlyingType enum class AttributeUnderlyingType
{ {
UInt8, UInt8,
@ -104,8 +103,10 @@ struct DictionaryStructure final
private: private:
std::vector<DictionaryAttribute> getAttributes( std::vector<DictionaryAttribute> getAttributes(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const Poco::Util::AbstractConfiguration & config,
const bool hierarchy_allowed = true, const bool allow_null_values = true); const std::string & config_prefix,
const bool hierarchy_allowed = true,
const bool allow_null_values = true);
}; };
} }

View File

@ -4,8 +4,7 @@
#include "GeodataProviders/HierarchiesProvider.h" #include "GeodataProviders/HierarchiesProvider.h"
#include "GeodataProviders/NamesProvider.h" #include "GeodataProviders/NamesProvider.h"
std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies( std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config)
const Poco::Util::AbstractConfiguration & config)
{ {
static constexpr auto config_key = "path_to_regions_hierarchy_file"; static constexpr auto config_key = "path_to_regions_hierarchy_file";
@ -17,8 +16,7 @@ std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarch
return std::make_unique<RegionsHierarchies>(std::move(data_provider)); return std::make_unique<RegionsHierarchies>(std::move(data_provider));
} }
std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames( std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames(const Poco::Util::AbstractConfiguration & config)
const Poco::Util::AbstractConfiguration & config)
{ {
static constexpr auto config_key = "path_to_regions_names_files"; static constexpr auto config_key = "path_to_regions_names_files";

View File

@ -7,9 +7,7 @@
class GeoDictionariesLoader : public IGeoDictionariesLoader class GeoDictionariesLoader : public IGeoDictionariesLoader
{ {
public: public:
std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies( std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config) override;
const Poco::Util::AbstractConfiguration & config) override;
std::unique_ptr<RegionsNames> reloadRegionsNames( std::unique_ptr<RegionsNames> reloadRegionsNames(const Poco::Util::AbstractConfiguration & config) override;
const Poco::Util::AbstractConfiguration & config) override;
}; };

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "Types.h"
#include <string> #include <string>
#include "Types.h"
struct RegionEntry struct RegionEntry
{ {
@ -17,4 +17,3 @@ struct RegionNameEntry
RegionID id; RegionID id;
std::string name; std::string name;
}; };

View File

@ -1,10 +1,10 @@
#include "HierarchiesProvider.h" #include "HierarchiesProvider.h"
#include "HierarchyFormatReader.h"
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <Poco/DirectoryIterator.h> #include <Poco/DirectoryIterator.h>
#include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include "HierarchyFormatReader.h"
bool RegionsHierarchyDataSource::isModified() const bool RegionsHierarchyDataSource::isModified() const
@ -20,8 +20,7 @@ IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader()
} }
RegionsHierarchiesDataProvider::RegionsHierarchiesDataProvider(const std::string & path) RegionsHierarchiesDataProvider::RegionsHierarchiesDataProvider(const std::string & path) : path(path)
: path(path)
{ {
discoverFilesWithCustomHierarchies(); discoverFilesWithCustomHierarchies();
} }
@ -37,9 +36,8 @@ void RegionsHierarchiesDataProvider::discoverFilesWithCustomHierarchies()
{ {
std::string candidate_basename = dir_it.path().getBaseName(); std::string candidate_basename = dir_it.path().getBaseName();
if ((0 == candidate_basename.compare(0, basename.size(), basename)) && if ((0 == candidate_basename.compare(0, basename.size(), basename)) && (candidate_basename.size() > basename.size() + 1)
(candidate_basename.size() > basename.size() + 1) && && (candidate_basename[basename.size()] == '_'))
(candidate_basename[basename.size()] == '_'))
{ {
const std::string suffix = candidate_basename.substr(basename.size() + 1); const std::string suffix = candidate_basename.substr(basename.size() + 1);
hierarchy_files.emplace(suffix, dir_it->path()); hierarchy_files.emplace(suffix, dir_it->path());

View File

@ -2,23 +2,19 @@
#include "IHierarchiesProvider.h" #include "IHierarchiesProvider.h"
#include <Common/FileUpdatesTracker.h>
#include <unordered_map> #include <unordered_map>
#include <Common/FileUpdatesTracker.h>
// Represents local file with regions hierarchy dump // Represents local file with regions hierarchy dump
class RegionsHierarchyDataSource class RegionsHierarchyDataSource : public IRegionsHierarchyDataSource
: public IRegionsHierarchyDataSource
{ {
private: private:
std::string path; std::string path;
FileUpdatesTracker updates_tracker; FileUpdatesTracker updates_tracker;
public: public:
RegionsHierarchyDataSource(const std::string & path_) RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {}
: path(path_)
, updates_tracker(path_)
{}
bool isModified() const override; bool isModified() const override;
@ -27,8 +23,7 @@ public:
// Provides access to directory with multiple data source files: one file per regions hierarchy // Provides access to directory with multiple data source files: one file per regions hierarchy
class RegionsHierarchiesDataProvider class RegionsHierarchiesDataProvider : public IRegionsHierarchiesDataProvider
: public IRegionsHierarchiesDataProvider
{ {
private: private:
// path to file with default regions hierarchy // path to file with default regions hierarchy
@ -55,4 +50,3 @@ public:
private: private:
void discoverFilesWithCustomHierarchies(); void discoverFilesWithCustomHierarchies();
}; };

View File

@ -30,8 +30,7 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
++input->position(); ++input->position();
UInt64 population_big = 0; UInt64 population_big = 0;
DB::readIntText(population_big, *input); DB::readIntText(population_big, *input);
population = population_big > std::numeric_limits<RegionPopulation>::max() population = population_big > std::numeric_limits<RegionPopulation>::max() ? std::numeric_limits<RegionPopulation>::max()
? std::numeric_limits<RegionPopulation>::max()
: population_big; : population_big;
} }
DB::assertChar('\n', *input); DB::assertChar('\n', *input);

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "IHierarchiesProvider.h"
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include "IHierarchiesProvider.h"
// Reads regions hierarchy in geoexport format // Reads regions hierarchy in geoexport format
@ -11,10 +11,7 @@ private:
DB::ReadBufferPtr input; DB::ReadBufferPtr input;
public: public:
RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {}
: input(std::move(input_))
{}
bool readNext(RegionEntry & entry) override; bool readNext(RegionEntry & entry) override;
}; };

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include "Entries.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "Entries.h"
// Iterates over all regions in data source // Iterates over all regions in data source
@ -46,4 +46,3 @@ public:
}; };
using IRegionsHierarchiesDataProviderPtr = std::shared_ptr<IRegionsHierarchiesDataProvider>; using IRegionsHierarchiesDataProviderPtr = std::shared_ptr<IRegionsHierarchiesDataProvider>;

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "Entries.h"
#include <memory> #include <memory>
#include "Entries.h"
// Iterates over all name entries in data source // Iterates over all name entries in data source
@ -42,11 +42,9 @@ using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr<ILanguageRegionsNames
class IRegionsNamesDataProvider class IRegionsNamesDataProvider
{ {
public: public:
virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource( virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const = 0;
const std::string & language) const = 0;
virtual ~IRegionsNamesDataProvider() {} virtual ~IRegionsNamesDataProvider() {}
}; };
using IRegionsNamesDataProviderPtr = std::unique_ptr<IRegionsNamesDataProvider>; using IRegionsNamesDataProviderPtr = std::unique_ptr<IRegionsNamesDataProvider>;

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "INamesProvider.h"
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include "INamesProvider.h"
// Reads regions names list in geoexport format // Reads regions names list in geoexport format
@ -11,9 +11,7 @@ private:
DB::ReadBufferPtr input; DB::ReadBufferPtr input;
public: public:
LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {}
: input(std::move(input_))
{}
bool readNext(RegionNameEntry & entry) override; bool readNext(RegionNameEntry & entry) override;
}; };

View File

@ -1,7 +1,7 @@
#include "NamesProvider.h" #include "NamesProvider.h"
#include "NamesFormatReader.h"
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include "NamesFormatReader.h"
bool LanguageRegionsNamesDataSource::isModified() const bool LanguageRegionsNamesDataSource::isModified() const
@ -32,12 +32,11 @@ std::string LanguageRegionsNamesDataSource::getSourceName() const
} }
RegionsNamesDataProvider::RegionsNamesDataProvider(const std::string & directory_) RegionsNamesDataProvider::RegionsNamesDataProvider(const std::string & directory_) : directory(directory_)
: directory(directory_) {
{} }
ILanguageRegionsNamesDataSourcePtr RegionsNamesDataProvider::getLanguageRegionsNamesSource( ILanguageRegionsNamesDataSourcePtr RegionsNamesDataProvider::getLanguageRegionsNamesSource(const std::string & language) const
const std::string & language) const
{ {
const auto data_file = getDataFilePath(language); const auto data_file = getDataFilePath(language);
return std::make_unique<LanguageRegionsNamesDataSource>(data_file, language); return std::make_unique<LanguageRegionsNamesDataSource>(data_file, language);

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "INamesProvider.h"
#include <Common/FileUpdatesTracker.h> #include <Common/FileUpdatesTracker.h>
#include "INamesProvider.h"
// Represents local file with list of regions ids / names // Represents local file with list of regions ids / names
@ -14,10 +14,9 @@ private:
public: public:
LanguageRegionsNamesDataSource(const std::string & path_, const std::string & language_) LanguageRegionsNamesDataSource(const std::string & path_, const std::string & language_)
: path(path_) : path(path_), updates_tracker(path_), language(language_)
, updates_tracker(path_) {
, language(language_) }
{}
bool isModified() const override; bool isModified() const override;
@ -42,8 +41,7 @@ private:
public: public:
RegionsNamesDataProvider(const std::string & directory_); RegionsNamesDataProvider(const std::string & directory_);
ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource( ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const override;
const std::string & language) const override;
private: private:
std::string getDataFilePath(const std::string & language) const; std::string getDataFilePath(const std::string & language) const;

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include <memory>
#include "RegionsHierarchies.h" #include "RegionsHierarchies.h"
#include "RegionsNames.h" #include "RegionsNames.h"
#include <memory>
namespace Poco namespace Poco
{ {
@ -20,11 +20,9 @@ namespace Poco
class IGeoDictionariesLoader class IGeoDictionariesLoader
{ {
public: public:
virtual std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies( virtual std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config) = 0;
const Poco::Util::AbstractConfiguration & config) = 0;
virtual std::unique_ptr<RegionsNames> reloadRegionsNames( virtual std::unique_ptr<RegionsNames> reloadRegionsNames(const Poco::Util::AbstractConfiguration & config) = 0;
const Poco::Util::AbstractConfiguration & config) = 0;
virtual ~IGeoDictionariesLoader() {} virtual ~IGeoDictionariesLoader() {}
}; };

View File

@ -1,7 +1,7 @@
#include "RegionsHierarchies.h" #include "RegionsHierarchies.h"
#include <common/logger_useful.h>
#include <Poco/DirectoryIterator.h> #include <Poco/DirectoryIterator.h>
#include <common/logger_useful.h>
RegionsHierarchies::RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider) RegionsHierarchies::RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider)

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include "RegionsHierarchy.h"
#include "GeodataProviders/IHierarchiesProvider.h"
#include <Poco/Exception.h>
#include <unordered_map> #include <unordered_map>
#include <Poco/Exception.h>
#include "GeodataProviders/IHierarchiesProvider.h"
#include "RegionsHierarchy.h"
/** Contains several hierarchies of regions. /** Contains several hierarchies of regions.

View File

@ -1,11 +1,11 @@
#include "RegionsHierarchy.h" #include "RegionsHierarchy.h"
#include "GeodataProviders/IHierarchiesProvider.h" #include <IO/WriteHelpers.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h> #include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <ext/singleton.h> #include <ext/singleton.h>
#include <IO/WriteHelpers.h> #include "GeodataProviders/IHierarchiesProvider.h"
namespace DB namespace DB
@ -17,8 +17,7 @@ namespace DB
} }
RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_) RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_) : data_source(data_source_)
: data_source(data_source_)
{ {
} }
@ -56,7 +55,8 @@ void RegionsHierarchy::reload()
if (region_entry.id > max_region_id) if (region_entry.id > max_region_id)
{ {
if (region_entry.id > max_size) if (region_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size), throw DB::Exception(
"Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size),
DB::ErrorCodes::INCORRECT_DATA); DB::ErrorCodes::INCORRECT_DATA);
max_region_id = region_entry.id; max_region_id = region_entry.id;
@ -113,14 +113,16 @@ void RegionsHierarchy::reload()
++depth; ++depth;
if (depth == std::numeric_limits<RegionDepth>::max()) if (depth == std::numeric_limits<RegionDepth>::max())
throw Poco::Exception("Logical error in regions hierarchy: region " + DB::toString(current) + " possible is inside infinite loop"); throw Poco::Exception(
"Logical error in regions hierarchy: region " + DB::toString(current) + " possible is inside infinite loop");
current = new_parents[current]; current = new_parents[current];
if (current == 0) if (current == 0)
break; break;
if (current > max_region_id) if (current > max_region_id)
throw Poco::Exception("Logical error in regions hierarchy: region " + DB::toString(current) + " (specified as parent) doesn't exist"); throw Poco::Exception(
"Logical error in regions hierarchy: region " + DB::toString(current) + " (specified as parent) doesn't exist");
if (types[current] == RegionType::City) if (types[current] == RegionType::City)
new_city[i] = current; new_city[i] = current;

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include "GeodataProviders/IHierarchiesProvider.h"
#include <vector> #include <vector>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <common/Types.h> #include <common/Types.h>
#include "GeodataProviders/IHierarchiesProvider.h"
class IRegionsHierarchyDataProvider; class IRegionsHierarchyDataProvider;

View File

@ -1,10 +1,10 @@
#include "RegionsNames.h" #include "RegionsNames.h"
#include "GeodataProviders/INamesProvider.h"
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <common/logger_useful.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include <common/logger_useful.h>
#include "GeodataProviders/INamesProvider.h"
namespace DB namespace DB
{ {
@ -84,7 +84,8 @@ void RegionsNames::reload()
max_region_id = name_entry.id; max_region_id = name_entry.id;
if (name_entry.id > max_size) if (name_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size), throw DB::Exception(
"Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size),
DB::ErrorCodes::INCORRECT_DATA); DB::ErrorCodes::INCORRECT_DATA);
} }

View File

@ -1,11 +1,11 @@
#pragma once #pragma once
#include "GeodataProviders/INamesProvider.h"
#include <Poco/Exception.h>
#include <common/Types.h>
#include <common/StringRef.h>
#include <string> #include <string>
#include <vector> #include <vector>
#include <Poco/Exception.h>
#include <common/StringRef.h>
#include <common/Types.h>
#include "GeodataProviders/INamesProvider.h"
/** A class that allows you to recognize by region id its text name in one of the supported languages: ru, en, ua, by, kz, tr. /** A class that allows you to recognize by region id its text name in one of the supported languages: ru, en, ua, by, kz, tr.
@ -41,19 +41,20 @@ private:
return res; return res;
} }
struct language_alias { const char * const name; const Language lang; }; struct language_alias
{
const char * const name;
const Language lang;
};
static const language_alias * getLanguageAliases() static const language_alias * getLanguageAliases()
{ {
static constexpr const language_alias language_aliases[] static constexpr const language_alias language_aliases[]{{"ru", Language::RU},
{
{ "ru", Language::RU },
{"en", Language::EN}, {"en", Language::EN},
{"ua", Language::UA}, {"ua", Language::UA},
{"uk", Language::UA}, {"uk", Language::UA},
{"by", Language::BY}, {"by", Language::BY},
{"kz", Language::KZ}, {"kz", Language::KZ},
{ "tr", Language::TR } {"tr", Language::TR}};
};
return language_aliases; return language_aliases;
} }

View File

@ -49,15 +49,9 @@ public:
} }
UInt8 OSToParent(UInt8 x) const UInt8 OSToParent(UInt8 x) const { return os_parent[x]; }
{
return os_parent[x];
}
UInt8 SEToParent(UInt8 x) const UInt8 SEToParent(UInt8 x) const { return se_parent[x]; }
{
return se_parent[x];
}
/// To the topmost ancestor. /// To the topmost ancestor.
@ -77,4 +71,6 @@ public:
}; };
class TechDataHierarchySingleton : public ext::singleton<TechDataHierarchySingleton>, public TechDataHierarchy {}; class TechDataHierarchySingleton : public ext::singleton<TechDataHierarchySingleton>, public TechDataHierarchy
{
};

View File

@ -1,26 +1,24 @@
#include "ExecutableDictionarySource.h" #include "ExecutableDictionarySource.h"
#include <thread>
#include <future> #include <future>
#include <Common/ShellCommand.h> #include <thread>
#include <Interpreters/Context.h>
#include <DataStreams/OwningBlockInputStream.h>
#include "DictionarySourceHelpers.h"
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Interpreters/Context.h>
#include <Common/ShellCommand.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
namespace DB namespace DB
{ {
static const size_t max_block_size = 8192; static const size_t max_block_size = 8192;
namespace namespace
{ {
/// Owns ShellCommand and calls wait for it. /// Owns ShellCommand and calls wait for it.
class ShellCommandOwningBlockInputStream : public OwningBlockInputStream<ShellCommand> class ShellCommandOwningBlockInputStream : public OwningBlockInputStream<ShellCommand>
{ {
@ -40,29 +38,32 @@ public:
} }
ExecutableDictionarySource::ExecutableDictionarySource(const DictionaryStructure & dict_struct_, ExecutableDictionarySource::ExecutableDictionarySource(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct_,
Block & sample_block, const Context & context) const Poco::Util::AbstractConfiguration & config,
: log(&Logger::get("ExecutableDictionarySource")), const std::string & config_prefix,
update_time{std::chrono::system_clock::from_time_t(0)}, Block & sample_block,
dict_struct{dict_struct_}, const Context & context)
command{config.getString(config_prefix + ".command")}, : log(&Logger::get("ExecutableDictionarySource"))
update_field{config.getString(config_prefix + ".update_field", "")}, , update_time{std::chrono::system_clock::from_time_t(0)}
format{config.getString(config_prefix + ".format")}, , dict_struct{dict_struct_}
sample_block{sample_block}, , command{config.getString(config_prefix + ".command")}
context(context) , update_field{config.getString(config_prefix + ".update_field", "")}
, format{config.getString(config_prefix + ".format")}
, sample_block{sample_block}
, context(context)
{ {
} }
ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionarySource & other) ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionarySource & other)
: log(&Logger::get("ExecutableDictionarySource")), : log(&Logger::get("ExecutableDictionarySource"))
update_time{other.update_time}, , update_time{other.update_time}
dict_struct{other.dict_struct}, , dict_struct{other.dict_struct}
command{other.command}, , command{other.command}
update_field{other.update_field}, , update_field{other.update_field}
format{other.format}, , format{other.format}
sample_block{other.sample_block}, , sample_block{other.sample_block}
context(other.context) , context(other.context)
{ {
} }
@ -110,7 +111,6 @@ BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
namespace namespace
{ {
/** A stream, that also runs and waits for background thread /** A stream, that also runs and waits for background thread
* (that will feed data into pipe to be read from the other side of the pipe). * (that will feed data into pipe to be read from the other side of the pipe).
*/ */
@ -118,10 +118,11 @@ class BlockInputStreamWithBackgroundThread final : public IProfilingBlockInputSt
{ {
public: public:
BlockInputStreamWithBackgroundThread( BlockInputStreamWithBackgroundThread(
const BlockInputStreamPtr & stream_, std::unique_ptr<ShellCommand> && command_, const BlockInputStreamPtr & stream_, std::unique_ptr<ShellCommand> && command_, std::packaged_task<void()> && task_)
std::packaged_task<void()> && task_) : stream{stream_}, command{std::move(command_)}, task(std::move(task_)), thread([this] {
: stream{stream_}, command{std::move(command_)}, task(std::move(task_)), task();
thread([this]{ task(); command->in.close(); }) command->in.close();
})
{ {
children.push_back(stream); children.push_back(stream);
} }
@ -180,15 +181,10 @@ BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector<UInt64
auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
return std::make_shared<BlockInputStreamWithBackgroundThread>( return std::make_shared<BlockInputStreamWithBackgroundThread>(
input_stream, std::move(process), std::packaged_task<void()>( input_stream, std::move(process), std::packaged_task<void()>([output_stream, &ids]() mutable { formatIDs(output_stream, ids); }));
[output_stream, &ids]() mutable
{
formatIDs(output_stream, ids);
}));
} }
BlockInputStreamPtr ExecutableDictionarySource::loadKeys( BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size()); LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size());
auto process = ShellCommand::execute(command); auto process = ShellCommand::execute(command);
@ -197,8 +193,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadKeys(
auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
return std::make_shared<BlockInputStreamWithBackgroundThread>( return std::make_shared<BlockInputStreamWithBackgroundThread>(
input_stream, std::move(process), std::packaged_task<void()>( input_stream, std::move(process), std::packaged_task<void()>([output_stream, key_columns, &requested_rows, this]() mutable
[output_stream, key_columns, &requested_rows, this]() mutable
{ {
formatKeys(dict_struct, output_stream, key_columns, requested_rows); formatKeys(dict_struct, output_stream, key_columns, requested_rows);
})); }));

View File

@ -1,15 +1,17 @@
#pragma once #pragma once
#include "IDictionarySource.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "IDictionarySource.h"
namespace Poco { class Logger; } namespace Poco
{
class Logger;
}
namespace DB namespace DB
{ {
/// Allows loading dictionaries from executable /// Allows loading dictionaries from executable
class ExecutableDictionarySource final : public IDictionarySource class ExecutableDictionarySource final : public IDictionarySource
{ {
@ -29,8 +31,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override; BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override; bool isModified() const override;

View File

@ -1,16 +1,15 @@
#include <ext/range.h> #include "ExternalQueryBuilder.h"
#include <boost/range/join.hpp>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include "writeParenthesisedString.h" #include <boost/range/join.hpp>
#include <ext/range.h>
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h" #include "writeParenthesisedString.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNSUPPORTED_METHOD; extern const int UNSUPPORTED_METHOD;
@ -238,10 +237,8 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector<UInt64>
} }
std::string ExternalQueryBuilder::composeLoadKeysQuery( std::string
const Columns & key_columns, ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method)
const std::vector<size_t> & requested_rows,
LoadKeysMethod method)
{ {
if (!dict_struct.key) if (!dict_struct.key)
throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD};

View File

@ -1,14 +1,13 @@
#pragma once #pragma once
#include <string> #include <string>
#include <Formats/FormatSettings.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Formats/FormatSettings.h>
#include <Parsers/IdentifierQuotingStyle.h> #include <Parsers/IdentifierQuotingStyle.h>
namespace DB namespace DB
{ {
struct DictionaryStructure; struct DictionaryStructure;
class WriteBuffer; class WriteBuffer;
@ -53,10 +52,7 @@ struct ExternalQueryBuilder
IN_WITH_TUPLES, IN_WITH_TUPLES,
}; };
std::string composeLoadKeysQuery( std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method);
const Columns & key_columns,
const std::vector<size_t> & requested_rows,
LoadKeysMethod method);
private: private:

View File

@ -1,17 +1,16 @@
#include <ext/range.h>
#include "ExternalResultDescription.h" #include "ExternalResultDescription.h"
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h> #include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypesNumber.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <ext/range.h>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_TYPE; extern const int UNKNOWN_TYPE;

View File

@ -5,7 +5,6 @@
namespace DB namespace DB
{ {
/** Common part for implementation of MySQLBlockInputStream, MongoDBBlockInputStream and others. /** Common part for implementation of MySQLBlockInputStream, MongoDBBlockInputStream and others.
*/ */
struct ExternalResultDescription struct ExternalResultDescription

View File

@ -1,36 +1,38 @@
#include "FileDictionarySource.h" #include "FileDictionarySource.h"
#include <Interpreters/Context.h>
#include <DataStreams/OwningBlockInputStream.h> #include <DataStreams/OwningBlockInputStream.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <Interpreters/Context.h>
#include <Poco/File.h> #include <Poco/File.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
namespace DB namespace DB
{ {
static const size_t max_block_size = 8192; static const size_t max_block_size = 8192;
FileDictionarySource::FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, FileDictionarySource::FileDictionarySource(
const Context & context) const std::string & filename, const std::string & format, Block & sample_block, const Context & context)
: filename{filename}, format{format}, sample_block{sample_block}, context(context) : filename{filename}, format{format}, sample_block{sample_block}, context(context)
{} {
}
FileDictionarySource::FileDictionarySource(const FileDictionarySource & other) FileDictionarySource::FileDictionarySource(const FileDictionarySource & other)
: filename{other.filename}, format{other.format}, : filename{other.filename}
sample_block{other.sample_block}, context(other.context), , format{other.format}
last_modification{other.last_modification} , sample_block{other.sample_block}
{} , context(other.context)
, last_modification{other.last_modification}
{
}
BlockInputStreamPtr FileDictionarySource::loadAll() BlockInputStreamPtr FileDictionarySource::loadAll()
{ {
auto in_ptr = std::make_unique<ReadBufferFromFile>(filename); auto in_ptr = std::make_unique<ReadBufferFromFile>(filename);
auto stream = context.getInputFormat( auto stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
format, *in_ptr, sample_block, max_block_size);
last_modification = getLastModification(); last_modification = getLastModification();
return std::make_shared<OwningBlockInputStream<ReadBuffer>>(stream, std::move(in_ptr)); return std::make_shared<OwningBlockInputStream<ReadBuffer>>(stream, std::move(in_ptr));

View File

@ -1,20 +1,18 @@
#pragma once #pragma once
#include "IDictionarySource.h"
#include <Poco/Timestamp.h> #include <Poco/Timestamp.h>
#include "IDictionarySource.h"
namespace DB namespace DB
{ {
class Context; class Context;
/// Allows loading dictionaries from a file with given format, does not support "random access" /// Allows loading dictionaries from a file with given format, does not support "random access"
class FileDictionarySource final : public IDictionarySource class FileDictionarySource final : public IDictionarySource
{ {
public: public:
FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context);
const Context & context);
FileDictionarySource(const FileDictionarySource & other); FileDictionarySource(const FileDictionarySource & other);
@ -30,8 +28,7 @@ public:
throw Exception{"Method loadIds is unsupported for FileDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; throw Exception{"Method loadIds is unsupported for FileDictionarySource", ErrorCodes::NOT_IMPLEMENTED};
} }
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & /*key_columns*/, const std::vector<size_t> & /*requested_rows*/) override
const Columns & /*key_columns*/, const std::vector<size_t> & /*requested_rows*/) override
{ {
throw Exception{"Method loadKeys is unsupported for FileDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; throw Exception{"Method loadKeys is unsupported for FileDictionarySource", ErrorCodes::NOT_IMPLEMENTED};
} }

View File

@ -1,11 +1,10 @@
#include "FlatDictionary.h" #include "FlatDictionary.h"
#include "DictionaryBlockInputStream.h"
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -21,12 +20,20 @@ static const auto initial_array_size = 1024;
static const auto max_array_size = 500000; static const auto max_array_size = 500000;
FlatDictionary::FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, FlatDictionary::FlatDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block) const std::string & name,
: name{name}, dict_struct(dict_struct), const DictionaryStructure & dict_struct,
source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), DictionarySourcePtr source_ptr,
require_nonempty(require_nonempty), const DictionaryLifetime dict_lifetime,
loaded_ids(initial_array_size, false), saved_block{std::move(saved_block)} bool require_nonempty,
BlockPtr saved_block)
: name{name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, require_nonempty(require_nonempty)
, loaded_ids(initial_array_size, false)
, saved_block{std::move(saved_block)}
{ {
createAttributes(); createAttributes();
@ -44,7 +51,8 @@ FlatDictionary::FlatDictionary(const std::string & name, const DictionaryStructu
} }
FlatDictionary::FlatDictionary(const FlatDictionary & other) FlatDictionary::FlatDictionary(const FlatDictionary & other)
: FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} : FlatDictionary{
other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block}
{ {
} }
@ -53,21 +61,26 @@ void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Ke
{ {
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
getItemsNumber<UInt64>(*hierarchical_attribute, ids, getItemsNumber<UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; }, [&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
/// Allow to use single value in same way as array. /// Allow to use single value in same way as array.
static inline FlatDictionary::Key getAt(const PaddedPODArray<FlatDictionary::Key> & arr, const size_t idx) { return arr[idx]; } static inline FlatDictionary::Key getAt(const PaddedPODArray<FlatDictionary::Key> & arr, const size_t idx)
static inline FlatDictionary::Key getAt(const FlatDictionary::Key & value, const size_t) { return value; } {
return arr[idx];
}
static inline FlatDictionary::Key getAt(const FlatDictionary::Key & value, const size_t)
{
return value;
}
template <typename ChildType, typename AncestorType> template <typename ChildType, typename AncestorType>
void FlatDictionary::isInImpl( void FlatDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
const ChildType & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
const auto & attr = std::get<ContainerType<Key>>(hierarchical_attribute->arrays); const auto & attr = std::get<ContainerType<Key>>(hierarchical_attribute->arrays);
@ -90,25 +103,17 @@ void FlatDictionary::isInImpl(
void FlatDictionary::isInVectorVector( void FlatDictionary::isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_ids, out); isInImpl(child_ids, ancestor_ids, out);
} }
void FlatDictionary::isInVectorConstant( void FlatDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & child_ids,
const Key ancestor_id,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_id, out); isInImpl(child_ids, ancestor_id, out);
} }
void FlatDictionary::isInConstantVector( void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const Key child_id,
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_id, ancestor_ids, out); isInImpl(child_id, ancestor_ids, out);
} }
@ -119,13 +124,13 @@ void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedP
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
const auto null_value = std::get<TYPE>(attribute.null_values); \ const auto null_value = std::get<TYPE>(attribute.null_values); \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
[&] (const size_t) { return null_value; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -147,27 +152,32 @@ void FlatDictionary::getString(const std::string & attribute_name, const PaddedP
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto & null_value = std::get<StringRef>(attribute.null_values); const auto & null_value = std::get<StringRef>(attribute.null_values);
getItemsImpl<StringRef, StringRef>(attribute, ids, getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void FlatDictionary::get##TYPE( \ void FlatDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \ ResultArrayType<TYPE> & out) const \
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
[&] (const size_t row) { return def[row]; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -186,14 +196,16 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void FlatDictionary::getString( void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
ColumnString * const out) const
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, ids, getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); }); [&](const size_t row) { return def->getDataAt(row); });
} }
@ -204,11 +216,11 @@ void FlatDictionary::get##TYPE(\
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
[&] (const size_t) { return def; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -227,14 +239,16 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void FlatDictionary::getString( void FlatDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
ColumnString * const out) const
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
FlatDictionary::getItemsImpl<StringRef, StringRef>(attribute, ids, FlatDictionary::getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; }); [&](const size_t) { return StringRef{def}; });
} }
@ -246,22 +260,52 @@ void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: has<UInt8>(attribute, ids, out); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: has<UInt16>(attribute, ids, out); break; has<UInt8>(attribute, ids, out);
case AttributeUnderlyingType::UInt32: has<UInt32>(attribute, ids, out); break; break;
case AttributeUnderlyingType::UInt64: has<UInt64>(attribute, ids, out); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: has<UInt128>(attribute, ids, out); break; has<UInt16>(attribute, ids, out);
case AttributeUnderlyingType::Int8: has<Int8>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Int16: has<Int16>(attribute, ids, out); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: has<Int32>(attribute, ids, out); break; has<UInt32>(attribute, ids, out);
case AttributeUnderlyingType::Int64: has<Int64>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Float32: has<Float32>(attribute, ids, out); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: has<Float64>(attribute, ids, out); break; has<UInt64>(attribute, ids, out);
case AttributeUnderlyingType::String: has<String>(attribute, ids, out); break; break;
case AttributeUnderlyingType::UInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::Float32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::Float64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::String:
has<String>(attribute, ids, out);
break;
case AttributeUnderlyingType::Decimal32: has<Decimal32>(attribute, ids, out); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: has<Decimal64>(attribute, ids, out); break; has<Decimal32>(attribute, ids, out);
case AttributeUnderlyingType::Decimal128: has<Decimal128>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Decimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::Decimal128:
has<Decimal128>(attribute, ids, out);
break;
} }
} }
@ -409,21 +453,49 @@ void FlatDictionary::calculateBytesAllocated()
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break; addAttributeSize<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break; break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: addAttributeSize<UInt128>(attribute); break; addAttributeSize<UInt16>(attribute);
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break; break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break; addAttributeSize<UInt32>(attribute);
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break; break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break; addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::UInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::Int8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::Int16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::Int32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::Int64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::Float32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::Float64:
addAttributeSize<Float64>(attribute);
break;
case AttributeUnderlyingType::Decimal32: addAttributeSize<Decimal32>(attribute); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: addAttributeSize<Decimal64>(attribute); break; addAttributeSize<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: addAttributeSize<Decimal128>(attribute); break; break;
case AttributeUnderlyingType::Decimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::Decimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -462,22 +534,52 @@ FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const Attribut
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break; createAttributeImpl<UInt8>(attr, null_value);
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: createAttributeImpl<UInt128>(attr, null_value); break; createAttributeImpl<UInt16>(attr, null_value);
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break; break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break; createAttributeImpl<UInt32>(attr, null_value);
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break; break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break; createAttributeImpl<UInt64>(attr, null_value);
case AttributeUnderlyingType::String: createAttributeImpl<String>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::Int8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::Int16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::Int32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::Int64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::Float32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::Float64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::String:
createAttributeImpl<String>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal32: createAttributeImpl<Decimal32>(attr, null_value); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: createAttributeImpl<Decimal64>(attr, null_value); break; createAttributeImpl<Decimal32>(attr, null_value);
case AttributeUnderlyingType::Decimal128: createAttributeImpl<Decimal128>(attr, null_value); break; break;
case AttributeUnderlyingType::Decimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
} }
return attr; return attr;
@ -486,12 +588,11 @@ FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const Attribut
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void FlatDictionary::getItemsNumber( void FlatDictionary::getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
if (false) {} if (false)
{
}
#define DISPATCH(TYPE) \ #define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsImpl<TYPE, OutputType>(attribute, ids, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default)); getItemsImpl<TYPE, OutputType>(attribute, ids, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
@ -510,17 +611,13 @@ void FlatDictionary::getItemsNumber(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void FlatDictionary::getItemsImpl( void FlatDictionary::getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays); const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays);
const auto rows = ext::size(ids); const auto rows = ext::size(ids);
@ -572,22 +669,52 @@ void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, cons
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>()); break; setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>()); break; break;
case AttributeUnderlyingType::UInt64: setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>()); break; setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::Int8: setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Int16: setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>()); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>()); break; setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::Int64: setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Float32: setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>()); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>()); break; setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::String: setAttributeValueImpl<String>(attribute, id, value.get<String>()); break; break;
case AttributeUnderlyingType::UInt128:
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
break;
case AttributeUnderlyingType::Int8:
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int16:
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int32:
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int64:
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Float32:
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::Float64:
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::String:
setAttributeValueImpl<String>(attribute, id, value.get<String>());
break;
case AttributeUnderlyingType::Decimal32: setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal128>()); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal128>()); break; setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal128>());
case AttributeUnderlyingType::Decimal128: setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>()); break; break;
case AttributeUnderlyingType::Decimal64:
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal128>());
break;
case AttributeUnderlyingType::Decimal128:
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
break;
} }
} }
@ -636,14 +763,12 @@ BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_nam
void registerDictionaryFlat(DictionaryFactory & factory) void registerDictionaryFlat(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (dict_struct.key) if (dict_struct.key)
throw Exception{"'key' is not supported for dictionary of layout 'flat'", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"'key' is not supported for dictionary of layout 'flat'", ErrorCodes::UNSUPPORTED_METHOD};
@ -655,12 +780,9 @@ void registerDictionaryFlat(DictionaryFactory & factory)
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
return std::make_unique<FlatDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); return std::make_unique<FlatDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
}; };
factory.registerLayout("flat", create_layout); factory.registerLayout("flat", create_layout);
} }
} }

View File

@ -1,28 +1,32 @@
#pragma once #pragma once
#include "IDictionary.h" #include <atomic>
#include "IDictionarySource.h" #include <variant>
#include "DictionaryStructure.h" #include <vector>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <ext/range.h> #include <ext/range.h>
#include <ext/size.h> #include <ext/size.h>
#include <atomic> #include "DictionaryStructure.h"
#include <vector> #include "IDictionary.h"
#include <variant> #include "IDictionarySource.h"
namespace DB namespace DB
{ {
using BlockPtr = std::shared_ptr<Block>; using BlockPtr = std::shared_ptr<Block>;
class FlatDictionary final : public IDictionary class FlatDictionary final : public IDictionary
{ {
public: public:
FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, FlatDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr); const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block = nullptr);
FlatDictionary(const FlatDictionary & other); FlatDictionary(const FlatDictionary & other);
@ -52,10 +56,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -66,7 +67,8 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override; void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override; void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
@ -95,7 +97,9 @@ public:
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const; ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -113,14 +117,12 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
ColumnString * const out) const; const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE(\ void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def,\
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
DECLARE(UInt32) DECLARE(UInt32)
@ -137,34 +139,53 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def,
ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override; void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using ContainerType = PaddedPODArray<Value>; template <typename Value>
using ContainerType = PaddedPODArray<Value>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant< std::variant<
UInt8, UInt16, UInt32, UInt64, UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
StringRef> null_values; Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
StringRef>
null_values;
std::variant< std::variant<
ContainerType<UInt8>, ContainerType<UInt16>, ContainerType<UInt32>, ContainerType<UInt64>, ContainerType<UInt8>,
ContainerType<UInt16>,
ContainerType<UInt32>,
ContainerType<UInt64>,
ContainerType<UInt128>, ContainerType<UInt128>,
ContainerType<Int8>, ContainerType<Int16>, ContainerType<Int32>, ContainerType<Int64>, ContainerType<Int8>,
ContainerType<Decimal32>, ContainerType<Decimal64>, ContainerType<Decimal128>, ContainerType<Int16>,
ContainerType<Float32>, ContainerType<Float64>, ContainerType<Int32>,
ContainerType<StringRef>> arrays; ContainerType<Int64>,
ContainerType<Decimal32>,
ContainerType<Decimal64>,
ContainerType<Decimal128>,
ContainerType<Float32>,
ContainerType<Float64>,
ContainerType<StringRef>>
arrays;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -185,17 +206,11 @@ private:
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber( void getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl( void getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename T> template <typename T>
void resize(Attribute & attribute, const Key id); void resize(Attribute & attribute, const Key id);
@ -211,10 +226,7 @@ private:
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const; void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
template <typename ChildType, typename AncestorType> template <typename ChildType, typename AncestorType>
void isInImpl( void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
const ChildType & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
PaddedPODArray<Key> getIds() const; PaddedPODArray<Key> getIds() const;

View File

@ -1,49 +1,51 @@
#include "HTTPDictionarySource.h" #include "HTTPDictionarySource.h"
#include <Poco/Net/HTTPRequest.h>
#include <Interpreters/Context.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <IO/WriteBufferFromOStream.h> #include <DataStreams/OwningBlockInputStream.h>
#include "DictionarySourceHelpers.h"
#include <common/logger_useful.h>
#include <IO/ConnectionTimeouts.h> #include <IO/ConnectionTimeouts.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/WriteBufferFromOStream.h>
#include <Interpreters/Context.h>
#include <Poco/Net/HTTPRequest.h>
#include <common/logger_useful.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
namespace DB namespace DB
{ {
static const size_t max_block_size = 8192; static const size_t max_block_size = 8192;
HTTPDictionarySource::HTTPDictionarySource(const DictionaryStructure & dict_struct_, HTTPDictionarySource::HTTPDictionarySource(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct_,
Block & sample_block, const Context & context) const Poco::Util::AbstractConfiguration & config,
: log(&Logger::get("HTTPDictionarySource")), const std::string & config_prefix,
update_time{std::chrono::system_clock::from_time_t(0)}, Block & sample_block,
dict_struct{dict_struct_}, const Context & context)
url{config.getString(config_prefix + ".url", "")}, : log(&Logger::get("HTTPDictionarySource"))
update_field{config.getString(config_prefix + ".update_field", "")}, , update_time{std::chrono::system_clock::from_time_t(0)}
format{config.getString(config_prefix + ".format")}, , dict_struct{dict_struct_}
sample_block{sample_block}, , url{config.getString(config_prefix + ".url", "")}
context(context), , update_field{config.getString(config_prefix + ".update_field", "")}
timeouts(ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef())) , format{config.getString(config_prefix + ".format")}
, sample_block{sample_block}
, context(context)
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))
{ {
} }
HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other) HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other)
: log(&Logger::get("HTTPDictionarySource")), : log(&Logger::get("HTTPDictionarySource"))
update_time{other.update_time}, , update_time{other.update_time}
dict_struct{other.dict_struct}, , dict_struct{other.dict_struct}
url{other.url}, , url{other.url}
update_field{other.update_field}, , update_field{other.update_field}
format{other.format}, , format{other.format}
sample_block{other.sample_block}, , sample_block{other.sample_block}
context(other.context), , context(other.context)
timeouts(ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef())) , timeouts(ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))
{ {
} }
@ -72,8 +74,8 @@ BlockInputStreamPtr HTTPDictionarySource::loadAll()
{ {
LOG_TRACE(log, "loadAll " + toString()); LOG_TRACE(log, "loadAll " + toString());
Poco::URI uri(url); Poco::URI uri(url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_GET, auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
ReadWriteBufferFromHTTP::OutStreamCallback(), timeouts); uri, Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), timeouts);
auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr)); return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr));
} }
@ -83,8 +85,8 @@ BlockInputStreamPtr HTTPDictionarySource::loadUpdatedAll()
Poco::URI uri(url); Poco::URI uri(url);
getUpdateFieldAndDate(uri); getUpdateFieldAndDate(uri);
LOG_TRACE(log, "loadUpdatedAll " + uri.toString()); LOG_TRACE(log, "loadUpdatedAll " + uri.toString());
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_GET, auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
ReadWriteBufferFromHTTP::OutStreamCallback(), timeouts); uri, Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), timeouts);
auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr)); return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr));
} }
@ -101,14 +103,12 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
}; };
Poco::URI uri(url); Poco::URI uri(url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, out_stream_callback, timeouts);
out_stream_callback, timeouts);
auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr)); return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr));
} }
BlockInputStreamPtr HTTPDictionarySource::loadKeys( BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size()); LOG_TRACE(log, "loadKeys " << toString() << " size = " << requested_rows.size());
@ -120,8 +120,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(
}; };
Poco::URI uri(url); Poco::URI uri(url);
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, out_stream_callback, timeouts);
out_stream_callback, timeouts);
auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size); auto input_stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr)); return std::make_shared<OwningBlockInputStream<ReadWriteBufferFromHTTP>>(input_stream, std::move(in_ptr));
} }

View File

@ -1,22 +1,25 @@
#pragma once #pragma once
#include <Poco/URI.h>
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <common/LocalDateTime.h>
#include <IO/ConnectionTimeouts.h> #include <IO/ConnectionTimeouts.h>
#include <Poco/URI.h>
#include <common/LocalDateTime.h>
#include "DictionaryStructure.h"
#include "IDictionarySource.h"
namespace Poco { class Logger; } namespace Poco
{
class Logger;
}
namespace DB namespace DB
{ {
/// Allows loading dictionaries from http[s] source /// Allows loading dictionaries from http[s] source
class HTTPDictionarySource final : public IDictionarySource class HTTPDictionarySource final : public IDictionarySource
{ {
public: public:
HTTPDictionarySource(const DictionaryStructure & dict_struct_, HTTPDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
Block & sample_block, Block & sample_block,
@ -30,8 +33,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override; BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override; bool isModified() const override;

View File

@ -1,11 +1,10 @@
#include <ext/size.h>
#include "HashedDictionary.h" #include "HashedDictionary.h"
#include <ext/size.h>
#include "DictionaryBlockInputStream.h" #include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -16,10 +15,19 @@ namespace ErrorCodes
} }
HashedDictionary::HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, HashedDictionary::HashedDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block) const std::string & name,
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), const DictionaryStructure & dict_struct,
require_nonempty(require_nonempty), saved_block{std::move(saved_block)} DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block)
: name{name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, require_nonempty(require_nonempty)
, saved_block{std::move(saved_block)}
{ {
createAttributes(); createAttributes();
@ -37,7 +45,8 @@ HashedDictionary::HashedDictionary(const std::string & name, const DictionaryStr
} }
HashedDictionary::HashedDictionary(const HashedDictionary & other) HashedDictionary::HashedDictionary(const HashedDictionary & other)
: HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} : HashedDictionary{
other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block}
{ {
} }
@ -46,21 +55,26 @@ void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<
{ {
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
getItemsNumber<UInt64>(*hierarchical_attribute, ids, getItemsNumber<UInt64>(
*hierarchical_attribute,
ids,
[&](const size_t row, const UInt64 value) { out[row] = value; }, [&](const size_t row, const UInt64 value) { out[row] = value; },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
/// Allow to use single value in same way as array. /// Allow to use single value in same way as array.
static inline HashedDictionary::Key getAt(const PaddedPODArray<HashedDictionary::Key> & arr, const size_t idx) { return arr[idx]; } static inline HashedDictionary::Key getAt(const PaddedPODArray<HashedDictionary::Key> & arr, const size_t idx)
static inline HashedDictionary::Key getAt(const HashedDictionary::Key & value, const size_t) { return value; } {
return arr[idx];
}
static inline HashedDictionary::Key getAt(const HashedDictionary::Key & value, const size_t)
{
return value;
}
template <typename ChildType, typename AncestorType> template <typename ChildType, typename AncestorType>
void HashedDictionary::isInImpl( void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
const ChildType & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
const auto & attr = *std::get<CollectionPtrType<Key>>(hierarchical_attribute->maps); const auto & attr = *std::get<CollectionPtrType<Key>>(hierarchical_attribute->maps);
@ -87,42 +101,35 @@ void HashedDictionary::isInImpl(
} }
void HashedDictionary::isInVectorVector( void HashedDictionary::isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_ids, out); isInImpl(child_ids, ancestor_ids, out);
} }
void HashedDictionary::isInVectorConstant( void HashedDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
const PaddedPODArray<Key> & child_ids,
const Key ancestor_id,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_ids, ancestor_id, out); isInImpl(child_ids, ancestor_id, out);
} }
void HashedDictionary::isInConstantVector( void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
const Key child_id,
const PaddedPODArray<Key> & ancestor_ids,
PaddedPODArray<UInt8> & out) const
{ {
isInImpl(child_id, ancestor_ids, out); isInImpl(child_id, ancestor_ids, out);
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const\ void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
const \
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
const auto null_value = std::get<TYPE>(attribute.null_values); \ const auto null_value = std::get<TYPE>(attribute.null_values); \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
[&] (const size_t) { return null_value; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -144,27 +151,32 @@ void HashedDictionary::getString(const std::string & attribute_name, const Padde
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto & null_value = StringRef{std::get<String>(attribute.null_values)}; const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(attribute, ids, getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void HashedDictionary::get##TYPE( \ void HashedDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \ ResultArrayType<TYPE> & out) const \
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
[&] (const size_t row) { return def[row]; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -183,14 +195,16 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void HashedDictionary::getString( void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
ColumnString * const out) const
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, ids, getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); }); [&](const size_t row) { return def->getDataAt(row); });
} }
@ -201,11 +215,11 @@ void HashedDictionary::get##TYPE(\
{ \ { \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, ids,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
[&] (const size_t) { return def; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -224,14 +238,16 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void HashedDictionary::getString( void HashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
ColumnString * const out) const
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, ids, getItemsImpl<StringRef, StringRef>(
attribute,
ids,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; }); [&](const size_t) { return StringRef{def}; });
} }
@ -242,22 +258,52 @@ void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: has<UInt8>(attribute, ids, out); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: has<UInt16>(attribute, ids, out); break; has<UInt8>(attribute, ids, out);
case AttributeUnderlyingType::UInt32: has<UInt32>(attribute, ids, out); break; break;
case AttributeUnderlyingType::UInt64: has<UInt64>(attribute, ids, out); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: has<UInt128>(attribute, ids, out); break; has<UInt16>(attribute, ids, out);
case AttributeUnderlyingType::Int8: has<Int8>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Int16: has<Int16>(attribute, ids, out); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: has<Int32>(attribute, ids, out); break; has<UInt32>(attribute, ids, out);
case AttributeUnderlyingType::Int64: has<Int64>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Float32: has<Float32>(attribute, ids, out); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: has<Float64>(attribute, ids, out); break; has<UInt64>(attribute, ids, out);
case AttributeUnderlyingType::String: has<StringRef>(attribute, ids, out); break; break;
case AttributeUnderlyingType::UInt128:
has<UInt128>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int8:
has<Int8>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int16:
has<Int16>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int32:
has<Int32>(attribute, ids, out);
break;
case AttributeUnderlyingType::Int64:
has<Int64>(attribute, ids, out);
break;
case AttributeUnderlyingType::Float32:
has<Float32>(attribute, ids, out);
break;
case AttributeUnderlyingType::Float64:
has<Float64>(attribute, ids, out);
break;
case AttributeUnderlyingType::String:
has<StringRef>(attribute, ids, out);
break;
case AttributeUnderlyingType::Decimal32: has<Decimal32>(attribute, ids, out); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: has<Decimal64>(attribute, ids, out); break; has<Decimal32>(attribute, ids, out);
case AttributeUnderlyingType::Decimal128: has<Decimal128>(attribute, ids, out); break; break;
case AttributeUnderlyingType::Decimal64:
has<Decimal64>(attribute, ids, out);
break;
case AttributeUnderlyingType::Decimal128:
has<Decimal128>(attribute, ids, out);
break;
} }
} }
@ -402,21 +448,49 @@ void HashedDictionary::calculateBytesAllocated()
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break; addAttributeSize<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break; break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: addAttributeSize<UInt128>(attribute); break; addAttributeSize<UInt16>(attribute);
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break; break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break; addAttributeSize<UInt32>(attribute);
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break; break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break; addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::UInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::Int8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::Int16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::Int32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::Int64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::Float32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::Float64:
addAttributeSize<Float64>(attribute);
break;
case AttributeUnderlyingType::Decimal32: addAttributeSize<Decimal32>(attribute); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: addAttributeSize<Decimal64>(attribute); break; addAttributeSize<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: addAttributeSize<Decimal128>(attribute); break; break;
case AttributeUnderlyingType::Decimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::Decimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -442,21 +516,49 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break; createAttributeImpl<UInt8>(attr, null_value);
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: createAttributeImpl<UInt128>(attr, null_value); break; createAttributeImpl<UInt16>(attr, null_value);
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break; break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break; createAttributeImpl<UInt32>(attr, null_value);
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break; break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break; createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::UInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::Int8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::Int16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::Int32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::Int64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::Float32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::Float64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal32: createAttributeImpl<Decimal32>(attr, null_value); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: createAttributeImpl<Decimal64>(attr, null_value); break; createAttributeImpl<Decimal32>(attr, null_value);
case AttributeUnderlyingType::Decimal128: createAttributeImpl<Decimal128>(attr, null_value); break; break;
case AttributeUnderlyingType::Decimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -473,12 +575,11 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void HashedDictionary::getItemsNumber( void HashedDictionary::getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
if (false) {} if (false)
{
}
#define DISPATCH(TYPE) \ #define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsImpl<TYPE, OutputType>(attribute, ids, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default)); getItemsImpl<TYPE, OutputType>(attribute, ids, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
@ -497,16 +598,12 @@ void HashedDictionary::getItemsNumber(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void HashedDictionary::getItemsImpl( void HashedDictionary::getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
const auto & attr = *std::get<CollectionPtrType<AttributeType>>(attribute.maps); const auto & attr = *std::get<CollectionPtrType<AttributeType>>(attribute.maps);
const auto rows = ext::size(ids); const auto rows = ext::size(ids);
@ -532,21 +629,49 @@ void HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>()); break; setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>()); break; break;
case AttributeUnderlyingType::UInt64: setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>()); break; setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::Int8: setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Int16: setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>()); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>()); break; setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
case AttributeUnderlyingType::Int64: setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Float32: setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>()); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>()); break; setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
break;
case AttributeUnderlyingType::UInt128:
setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
break;
case AttributeUnderlyingType::Int8:
setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int16:
setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int32:
setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Int64:
setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
break;
case AttributeUnderlyingType::Float32:
setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::Float64:
setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
break;
case AttributeUnderlyingType::Decimal32: setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>()); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>()); break; setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
case AttributeUnderlyingType::Decimal128: setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>()); break; break;
case AttributeUnderlyingType::Decimal64:
setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
break;
case AttributeUnderlyingType::Decimal128:
setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -599,22 +724,37 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: return getIds<UInt8>(attribute); case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: return getIds<UInt16>(attribute); return getIds<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: return getIds<UInt32>(attribute); case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt64: return getIds<UInt64>(attribute); return getIds<UInt16>(attribute);
case AttributeUnderlyingType::UInt128: return getIds<UInt128>(attribute); case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int8: return getIds<Int8>(attribute); return getIds<UInt32>(attribute);
case AttributeUnderlyingType::Int16: return getIds<Int16>(attribute); case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int32: return getIds<Int32>(attribute); return getIds<UInt64>(attribute);
case AttributeUnderlyingType::Int64: return getIds<Int64>(attribute); case AttributeUnderlyingType::UInt128:
case AttributeUnderlyingType::Float32: return getIds<Float32>(attribute); return getIds<UInt128>(attribute);
case AttributeUnderlyingType::Float64: return getIds<Float64>(attribute); case AttributeUnderlyingType::Int8:
case AttributeUnderlyingType::String: return getIds<StringRef>(attribute); return getIds<Int8>(attribute);
case AttributeUnderlyingType::Int16:
return getIds<Int16>(attribute);
case AttributeUnderlyingType::Int32:
return getIds<Int32>(attribute);
case AttributeUnderlyingType::Int64:
return getIds<Int64>(attribute);
case AttributeUnderlyingType::Float32:
return getIds<Float32>(attribute);
case AttributeUnderlyingType::Float64:
return getIds<Float64>(attribute);
case AttributeUnderlyingType::String:
return getIds<StringRef>(attribute);
case AttributeUnderlyingType::Decimal32: return getIds<Decimal32>(attribute); case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: return getIds<Decimal64>(attribute); return getIds<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: return getIds<Decimal128>(attribute); case AttributeUnderlyingType::Decimal64:
return getIds<Decimal64>(attribute);
case AttributeUnderlyingType::Decimal128:
return getIds<Decimal128>(attribute);
} }
return PaddedPODArray<Key>(); return PaddedPODArray<Key>();
} }
@ -627,13 +767,12 @@ BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_n
void registerDictionaryHashed(DictionaryFactory & factory) void registerDictionaryHashed(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (dict_struct.key) if (dict_struct.key)
throw Exception{"'key' is not supported for dictionary of layout 'hashed'", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"'key' is not supported for dictionary of layout 'hashed'", ErrorCodes::UNSUPPORTED_METHOD};
@ -645,7 +784,6 @@ void registerDictionaryHashed(DictionaryFactory & factory)
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
return std::make_unique<HashedDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); return std::make_unique<HashedDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
}; };
factory.registerLayout("hashed", create_layout); factory.registerLayout("hashed", create_layout);
} }

View File

@ -1,27 +1,31 @@
#pragma once #pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <ext/range.h>
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include <variant> #include <variant>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB namespace DB
{ {
using BlockPtr = std::shared_ptr<Block>; using BlockPtr = std::shared_ptr<Block>;
class HashedDictionary final : public IDictionary class HashedDictionary final : public IDictionary
{ {
public: public:
HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, HashedDictionary(
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr); const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block = nullptr);
HashedDictionary(const HashedDictionary & other); HashedDictionary(const HashedDictionary & other);
@ -51,10 +55,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -90,7 +91,9 @@ public:
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const; ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -108,13 +111,13 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
ColumnString * const out) const; const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE(\ void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const; const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
DECLARE(UInt32) DECLARE(UInt32)
@ -131,39 +134,60 @@ public:
DECLARE(Decimal128) DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void getString( void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def,
ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override; void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override; void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using CollectionType = HashMap<UInt64, Value>; template <typename Value>
template <typename Value> using CollectionPtrType = std::unique_ptr<CollectionType<Value>>; using CollectionType = HashMap<UInt64, Value>;
template <typename Value>
using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant< std::variant<
UInt8, UInt16, UInt32, UInt64, UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
String> null_values; Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant< std::variant<
CollectionPtrType<UInt8>, CollectionPtrType<UInt16>, CollectionPtrType<UInt32>, CollectionPtrType<UInt64>, CollectionPtrType<UInt8>,
CollectionPtrType<UInt16>,
CollectionPtrType<UInt32>,
CollectionPtrType<UInt64>,
CollectionPtrType<UInt128>, CollectionPtrType<UInt128>,
CollectionPtrType<Int8>, CollectionPtrType<Int16>, CollectionPtrType<Int32>, CollectionPtrType<Int64>, CollectionPtrType<Int8>,
CollectionPtrType<Decimal32>, CollectionPtrType<Decimal64>, CollectionPtrType<Decimal128>, CollectionPtrType<Int16>,
CollectionPtrType<Float32>, CollectionPtrType<Float64>, CollectionPtrType<Int32>,
CollectionPtrType<StringRef>> maps; CollectionPtrType<Int64>,
CollectionPtrType<Decimal32>,
CollectionPtrType<Decimal64>,
CollectionPtrType<Decimal128>,
CollectionPtrType<Float32>,
CollectionPtrType<Float64>,
CollectionPtrType<StringRef>>
maps;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -187,17 +211,11 @@ private:
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber( void getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl( void getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
const PaddedPODArray<Key> & ids,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename T> template <typename T>
void setAttributeValueImpl(Attribute & attribute, const Key id, const T value); void setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
@ -215,10 +233,7 @@ private:
PaddedPODArray<Key> getIds() const; PaddedPODArray<Key> getIds() const;
template <typename ChildType, typename AncestorType> template <typename ChildType, typename AncestorType>
void isInImpl( void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
const ChildType & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
const std::string name; const std::string name;
const DictionaryStructure dict_struct; const DictionaryStructure dict_struct;

View File

@ -1,18 +1,17 @@
#pragma once #pragma once
#include <chrono>
#include <memory>
#include <Core/Field.h> #include <Core/Field.h>
#include <Interpreters/IExternalLoadable.h>
#include <common/StringRef.h>
#include <Core/Names.h> #include <Core/Names.h>
#include <Interpreters/IExternalLoadable.h>
#include <Poco/Util/XMLConfiguration.h> #include <Poco/Util/XMLConfiguration.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <memory> #include <common/StringRef.h>
#include <chrono>
#include "IDictionarySource.h" #include "IDictionarySource.h"
namespace DB namespace DB
{ {
struct IDictionaryBase; struct IDictionaryBase;
using DictionaryPtr = std::unique_ptr<IDictionaryBase>; using DictionaryPtr = std::unique_ptr<IDictionaryBase>;
@ -79,17 +78,20 @@ struct IDictionary : IDictionaryBase
/// Methods for hierarchy. /// Methods for hierarchy.
virtual void isInVectorVector(const PaddedPODArray<Key> & /*child_ids*/, const PaddedPODArray<Key> & /*ancestor_ids*/, PaddedPODArray<UInt8> & /*out*/) const virtual void isInVectorVector(
const PaddedPODArray<Key> & /*child_ids*/, const PaddedPODArray<Key> & /*ancestor_ids*/, PaddedPODArray<UInt8> & /*out*/) const
{ {
throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED);
} }
virtual void isInVectorConstant(const PaddedPODArray<Key> & /*child_ids*/, const Key /*ancestor_id*/, PaddedPODArray<UInt8> & /*out*/) const virtual void
isInVectorConstant(const PaddedPODArray<Key> & /*child_ids*/, const Key /*ancestor_id*/, PaddedPODArray<UInt8> & /*out*/) const
{ {
throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED);
} }
virtual void isInConstantVector(const Key /*child_id*/, const PaddedPODArray<Key> & /*ancestor_ids*/, PaddedPODArray<UInt8> & /*out*/) const virtual void
isInConstantVector(const Key /*child_id*/, const PaddedPODArray<Key> & /*ancestor_ids*/, PaddedPODArray<UInt8> & /*out*/) const
{ {
throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Hierarchy is not supported for " + getName() + " dictionary.", ErrorCodes::NOT_IMPLEMENTED);
} }

View File

@ -1,11 +1,10 @@
#pragma once #pragma once
#include <DataStreams/IBlockInputStream.h>
#include <vector> #include <vector>
#include <DataStreams/IBlockInputStream.h>
namespace DB namespace DB
{ {
class IDictionarySource; class IDictionarySource;
using DictionarySourcePtr = std::unique_ptr<IDictionarySource>; using DictionarySourcePtr = std::unique_ptr<IDictionarySource>;
@ -36,8 +35,7 @@ public:
* `requested_rows` contains indices of all rows containing unique keys. * `requested_rows` contains indices of all rows containing unique keys.
* It must be guaranteed, that 'requested_rows' array will live at least until all data will be read from returned stream. * It must be guaranteed, that 'requested_rows' array will live at least until all data will be read from returned stream.
*/ */
virtual BlockInputStreamPtr loadKeys( virtual BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) = 0;
const Columns & key_columns, const std::vector<size_t> & requested_rows) = 0;
/// indicates whether the source has been modified since last load* operation /// indicates whether the source has been modified since last load* operation
virtual bool isModified() const = 0; virtual bool isModified() const = 0;

View File

@ -1,6 +1,5 @@
#include <DataStreams/OneBlockInputStream.h>
#include "LibraryDictionarySource.h" #include "LibraryDictionarySource.h"
#include "LibraryDictionarySourceExternal.h" #include <DataStreams/OneBlockInputStream.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Poco/File.h> #include <Poco/File.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
@ -9,6 +8,7 @@
#include <ext/scope_guard.h> #include <ext/scope_guard.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "LibraryDictionarySourceExternal.h"
namespace DB namespace DB
@ -78,7 +78,8 @@ namespace
auto columns_received = static_cast<const ClickHouseLibrary::Table *>(data); auto columns_received = static_cast<const ClickHouseLibrary::Table *>(data);
if (columns_received->error_code) if (columns_received->error_code)
throw Exception("LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " throw Exception(
"LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " "
+ (columns_received->error_string ? columns_received->error_string : ""), + (columns_received->error_string ? columns_received->error_string : ""),
ErrorCodes::EXTERNAL_LIBRARY_ERROR); ErrorCodes::EXTERNAL_LIBRARY_ERROR);
@ -89,8 +90,9 @@ namespace
for (size_t col_n = 0; col_n < columns_received->size; ++col_n) for (size_t col_n = 0; col_n < columns_received->size; ++col_n)
{ {
if (columns.size() != columns_received->data[col_n].size) if (columns.size() != columns_received->data[col_n].size)
throw Exception("LibraryDictionarySource: Returned unexpected number of columns: " throw Exception(
+ std::to_string(columns_received->data[col_n].size) + ", must be " + std::to_string(columns.size()), "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size)
+ ", must be " + std::to_string(columns.size()),
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n) for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n)
@ -115,7 +117,8 @@ namespace
} }
LibraryDictionarySource::LibraryDictionarySource(const DictionaryStructure & dict_struct_, LibraryDictionarySource::LibraryDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
Block & sample_block, Block & sample_block,
@ -128,7 +131,8 @@ LibraryDictionarySource::LibraryDictionarySource(const DictionaryStructure & dic
, context(context) , context(context)
{ {
if (!Poco::File(path).exists()) if (!Poco::File(path).exists())
throw Exception("LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", throw Exception(
"LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist",
ErrorCodes::FILE_DOESNT_EXIST); ErrorCodes::FILE_DOESNT_EXIST);
description.init(sample_block); description.init(sample_block);
library = std::make_shared<SharedLibrary>(path); library = std::make_shared<SharedLibrary>(path);
@ -151,7 +155,8 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource &
{ {
if (auto libClone = library->tryGet<decltype(lib_data) (*)(decltype(other.lib_data))>("ClickHouseDictionary_v3_libClone")) if (auto libClone = library->tryGet<decltype(lib_data) (*)(decltype(other.lib_data))>("ClickHouseDictionary_v3_libClone"))
lib_data = libClone(other.lib_data); lib_data = libClone(other.lib_data);
else if (auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>( else if (
auto libNew = library->tryGet<decltype(lib_data) (*)(decltype(&settings->strings), decltype(&ClickHouseLibrary::log))>(
"ClickHouseDictionary_v3_libNew")) "ClickHouseDictionary_v3_libNew"))
lib_data = libNew(&settings->strings, ClickHouseLibrary::log); lib_data = libNew(&settings->strings, ClickHouseLibrary::log);
} }
@ -167,8 +172,8 @@ BlockInputStreamPtr LibraryDictionarySource::loadAll()
LOG_TRACE(log, "loadAll " + toString()); LOG_TRACE(log, "loadAll " + toString());
auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size());
ClickHouseLibrary::CStrings columns{ ClickHouseLibrary::CStrings columns{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()),
static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), dict_struct.attributes.size()}; dict_struct.attributes.size()};
size_t i = 0; size_t i = 0;
for (auto & a : dict_struct.attributes) for (auto & a : dict_struct.attributes)
{ {
@ -193,8 +198,8 @@ BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector<UInt64> &
const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()}; const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()};
auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size()); auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(dict_struct.attributes.size());
ClickHouseLibrary::CStrings columns_pass{ ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()),
static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), dict_struct.attributes.size()}; dict_struct.attributes.size()};
size_t i = 0; size_t i = 0;
for (auto & a : dict_struct.attributes) for (auto & a : dict_struct.attributes)
{ {

View File

@ -1,10 +1,10 @@
#pragma once #pragma once
#include <Common/SharedLibrary.h>
#include <common/LocalDateTime.h>
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "ExternalResultDescription.h" #include "ExternalResultDescription.h"
#include "IDictionarySource.h" #include "IDictionarySource.h"
#include <Common/SharedLibrary.h>
#include <common/LocalDateTime.h>
namespace Poco namespace Poco
@ -28,7 +28,8 @@ class CStringsHolder;
class LibraryDictionarySource final : public IDictionarySource class LibraryDictionarySource final : public IDictionarySource
{ {
public: public:
LibraryDictionarySource(const DictionaryStructure & dict_struct_, LibraryDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
Block & sample_block, Block & sample_block,
@ -54,10 +55,7 @@ public:
bool supportsSelectiveLoad() const override; bool supportsSelectiveLoad() const override;
///Not yet supported ///Not yet supported
bool hasUpdateField() const override bool hasUpdateField() const override { return false; }
{
return false;
}
DictionarySourcePtr clone() const override; DictionarySourcePtr clone() const override;

View File

@ -1,29 +1,28 @@
#include <Common/config.h> #include <Common/config.h>
#if USE_POCO_MONGODB #if USE_POCO_MONGODB
#include <vector>
#include <string>
# include <sstream> # include <sstream>
# include <string>
# include <vector>
# include <Poco/MongoDB/Connection.h> # include <Poco/MongoDB/Connection.h>
# include <Poco/MongoDB/Cursor.h> # include <Poco/MongoDB/Cursor.h>
# include <Poco/MongoDB/Element.h> # include <Poco/MongoDB/Element.h>
# include <Poco/MongoDB/ObjectId.h> # include <Poco/MongoDB/ObjectId.h>
#include "DictionaryStructure.h" # include <Columns/ColumnNullable.h>
#include "MongoDBBlockInputStream.h"
# include <Columns/ColumnString.h> # include <Columns/ColumnString.h>
# include <Columns/ColumnsNumber.h> # include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Common/FieldVisitors.h>
#include <IO/WriteHelpers.h>
# include <IO/ReadHelpers.h> # include <IO/ReadHelpers.h>
# include <IO/WriteHelpers.h>
# include <Common/FieldVisitors.h>
# include <ext/range.h> # include <ext/range.h>
# include "DictionaryStructure.h"
# include "MongoDBBlockInputStream.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -55,16 +54,20 @@ namespace
switch (value.type()) switch (value.type())
{ {
case Poco::MongoDB::ElementTraits<Int32>::TypeId: case Poco::MongoDB::ElementTraits<Int32>::TypeId:
static_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value()); static_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Int32> &>(value).value());
break; break;
case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId: case Poco::MongoDB::ElementTraits<Poco::Int64>::TypeId:
static_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value()); static_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Int64> &>(value).value());
break; break;
case Poco::MongoDB::ElementTraits<Float64>::TypeId: case Poco::MongoDB::ElementTraits<Float64>::TypeId:
static_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value()); static_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Float64> &>(value).value());
break; break;
case Poco::MongoDB::ElementTraits<bool>::TypeId: case Poco::MongoDB::ElementTraits<bool>::TypeId:
static_cast<ColumnVector<T> &>(column).getData().push_back(static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value()); static_cast<ColumnVector<T> &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<bool> &>(value).value());
break; break;
case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId: case Poco::MongoDB::ElementTraits<Poco::MongoDB::NullValue>::TypeId:
static_cast<ColumnVector<T> &>(column).getData().emplace_back(); static_cast<ColumnVector<T> &>(column).getData().emplace_back();
@ -74,26 +77,46 @@ namespace
parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value())); parse<T>(static_cast<const Poco::MongoDB::ConcreteElement<String> &>(value).value()));
break; break;
default: default:
throw Exception("Type mismatch, expected a number, got type id = " + toString(value.type()) + throw Exception(
" for column " + name, ErrorCodes::TYPE_MISMATCH); "Type mismatch, expected a number, got type id = " + toString(value.type()) + " for column " + name,
ErrorCodes::TYPE_MISMATCH);
} }
} }
void insertValue( void insertValue(IColumn & column, const ValueType type, const Poco::MongoDB::Element & value, const std::string & name)
IColumn & column, const ValueType type, const Poco::MongoDB::Element & value, const std::string & name)
{ {
switch (type) switch (type)
{ {
case ValueType::UInt8: insertNumber<UInt8>(column, value, name); break; case ValueType::UInt8:
case ValueType::UInt16: insertNumber<UInt16>(column, value, name); break; insertNumber<UInt8>(column, value, name);
case ValueType::UInt32: insertNumber<UInt32>(column, value, name); break; break;
case ValueType::UInt64: insertNumber<UInt64>(column, value, name); break; case ValueType::UInt16:
case ValueType::Int8: insertNumber<Int8>(column, value, name); break; insertNumber<UInt16>(column, value, name);
case ValueType::Int16: insertNumber<Int16>(column, value, name); break; break;
case ValueType::Int32: insertNumber<Int32>(column, value, name); break; case ValueType::UInt32:
case ValueType::Int64: insertNumber<Int64>(column, value, name); break; insertNumber<UInt32>(column, value, name);
case ValueType::Float32: insertNumber<Float32>(column, value, name); break; break;
case ValueType::Float64: insertNumber<Float64>(column, value, name); break; case ValueType::UInt64:
insertNumber<UInt64>(column, value, name);
break;
case ValueType::Int8:
insertNumber<Int8>(column, value, name);
break;
case ValueType::Int16:
insertNumber<Int16>(column, value, name);
break;
case ValueType::Int32:
insertNumber<Int32>(column, value, name);
break;
case ValueType::Int64:
insertNumber<Int64>(column, value, name);
break;
case ValueType::Float32:
insertNumber<Float32>(column, value, name);
break;
case ValueType::Float64:
insertNumber<Float64>(column, value, name);
break;
case ValueType::String: case ValueType::String:
{ {
@ -110,18 +133,17 @@ namespace
break; break;
} }
throw Exception{"Type mismatch, expected String, got type id = " + toString(value.type()) + throw Exception{"Type mismatch, expected String, got type id = " + toString(value.type()) + " for column " + name,
" for column " + name, ErrorCodes::TYPE_MISMATCH}; ErrorCodes::TYPE_MISMATCH};
} }
case ValueType::Date: case ValueType::Date:
{ {
if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId) if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name,
" for column " + name, ErrorCodes::TYPE_MISMATCH}; ErrorCodes::TYPE_MISMATCH};
static_cast<ColumnUInt16 &>(column).getData().push_back( static_cast<ColumnUInt16 &>(column).getData().push_back(UInt16{DateLUT::instance().toDayNum(
UInt16{DateLUT::instance().toDayNum(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime())}); static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime())});
break; break;
} }
@ -129,8 +151,8 @@ namespace
case ValueType::DateTime: case ValueType::DateTime:
{ {
if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId) if (value.type() != Poco::MongoDB::ElementTraits<Poco::Timestamp>::TypeId)
throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name,
" for column " + name, ErrorCodes::TYPE_MISMATCH}; ErrorCodes::TYPE_MISMATCH};
static_cast<ColumnUInt32 &>(column).getData().push_back( static_cast<ColumnUInt32 &>(column).getData().push_back(
static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime()); static_cast<const Poco::MongoDB::ConcreteElement<Poco::Timestamp> &>(value).value().epochTime());
@ -144,17 +166,15 @@ namespace
static_cast<ColumnUInt128 &>(column).getData().push_back(parse<UUID>(string)); static_cast<ColumnUInt128 &>(column).getData().push_back(parse<UUID>(string));
} }
else else
throw Exception{"Type mismatch, expected String (UUID), got type id = " + toString(value.type()) + throw Exception{"Type mismatch, expected String (UUID), got type id = " + toString(value.type()) + " for column "
" for column " + name, ErrorCodes::TYPE_MISMATCH}; + name,
ErrorCodes::TYPE_MISMATCH};
break; break;
} }
} }
} }
void insertDefaultValue(IColumn & column, const IColumn & sample_column) void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
{
column.insertFrom(sample_column, 0);
}
} }

View File

@ -17,7 +17,6 @@ namespace Poco
namespace DB namespace DB
{ {
/// Converts MongoDB Cursor to a stream of Blocks /// Converts MongoDB Cursor to a stream of Blocks
class MongoDBBlockInputStream final : public IProfilingBlockInputStream class MongoDBBlockInputStream final : public IProfilingBlockInputStream
{ {

View File

@ -1,10 +1,9 @@
#include "MongoDBDictionarySource.h"
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "MongoDBDictionarySource.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
@ -36,27 +35,26 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
#if USE_POCO_MONGODB #if USE_POCO_MONGODB
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/MongoDB/Connection.h>
#include <Poco/MongoDB/Database.h>
#include <Poco/MongoDB/Cursor.h>
# include <Poco/MongoDB/Array.h> # include <Poco/MongoDB/Array.h>
# include <Poco/MongoDB/Connection.h>
# include <Poco/MongoDB/Cursor.h>
# include <Poco/MongoDB/Database.h>
# include <Poco/MongoDB/ObjectId.h> # include <Poco/MongoDB/ObjectId.h>
# include <Poco/Util/AbstractConfiguration.h>
# include <Poco/Version.h> # include <Poco/Version.h>
// only after poco // only after poco
// naming conflict: // naming conflict:
// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); // Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value);
// dbms/src/IO/WriteHelpers.h:146 #define writeCString(s, buf) // dbms/src/IO/WriteHelpers.h:146 #define writeCString(s, buf)
#include "MongoDBBlockInputStream.h"
#include <Common/FieldVisitors.h>
# include <IO/WriteHelpers.h> # include <IO/WriteHelpers.h>
# include <Common/FieldVisitors.h>
# include <ext/enumerate.h> # include <ext/enumerate.h>
# include "MongoDBBlockInputStream.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNSUPPORTED_METHOD; extern const int UNSUPPORTED_METHOD;
@ -70,8 +68,8 @@ static const size_t max_block_size = 8192;
# if POCO_VERSION < 0x01070800 # if POCO_VERSION < 0x01070800
/// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485 /// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485
static void authenticate(Poco::MongoDB::Connection & connection, static void
const std::string & database, const std::string & user, const std::string & password) authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password)
{ {
Poco::MongoDB::Database db(database); Poco::MongoDB::Database db(database);
@ -88,7 +86,8 @@ static void authenticate(Poco::MongoDB::Connection & connection,
connection.sendRequest(*command, response); connection.sendRequest(*command, response);
if (response.documents().empty()) if (response.documents().empty())
throw Exception("Cannot authenticate in MongoDB: server returned empty response for 'getnonce' command", throw Exception(
"Cannot authenticate in MongoDB: server returned empty response for 'getnonce' command",
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
auto doc = response.documents()[0]; auto doc = response.documents()[0];
@ -96,18 +95,24 @@ static void authenticate(Poco::MongoDB::Connection & connection,
{ {
double ok = doc->get<double>("ok", 0); double ok = doc->get<double>("ok", 0);
if (ok != 1) if (ok != 1)
throw Exception("Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" throw Exception(
" has field 'ok' missing or having wrong value", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that"
" has field 'ok' missing or having wrong value",
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
nonce = doc->get<std::string>("nonce", ""); nonce = doc->get<std::string>("nonce", "");
if (nonce.empty()) if (nonce.empty())
throw Exception("Cannot authenticate in MongoDB: server returned response for 'getnonce' command that" throw Exception(
" has field 'nonce' missing or empty", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that"
" has field 'nonce' missing or empty",
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
} }
catch (Poco::NotFoundException & e) catch (Poco::NotFoundException & e)
{ {
throw Exception("Cannot authenticate in MongoDB: server returned response for 'getnonce' command that has missing required field: " throw Exception(
+ e.displayText(), ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); "Cannot authenticate in MongoDB: server returned response for 'getnonce' command that has missing required field: "
+ e.displayText(),
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
} }
} }
@ -136,7 +141,8 @@ static void authenticate(Poco::MongoDB::Connection & connection,
connection.sendRequest(*command, response); connection.sendRequest(*command, response);
if (response.empty()) if (response.empty())
throw Exception("Cannot authenticate in MongoDB: server returned empty response for 'authenticate' command", throw Exception(
"Cannot authenticate in MongoDB: server returned empty response for 'authenticate' command",
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
auto doc = response.documents()[0]; auto doc = response.documents()[0];
@ -144,13 +150,17 @@ static void authenticate(Poco::MongoDB::Connection & connection,
{ {
double ok = doc->get<double>("ok", 0); double ok = doc->get<double>("ok", 0);
if (ok != 1) if (ok != 1)
throw Exception("Cannot authenticate in MongoDB: server returned response for 'authenticate' command that" throw Exception(
" has field 'ok' missing or having wrong value", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that"
" has field 'ok' missing or having wrong value",
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
} }
catch (Poco::NotFoundException & e) catch (Poco::NotFoundException & e)
{ {
throw Exception("Cannot authenticate in MongoDB: server returned response for 'authenticate' command that has missing required field: " throw Exception(
+ e.displayText(), ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); "Cannot authenticate in MongoDB: server returned response for 'authenticate' command that has missing required field: "
+ e.displayText(),
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
} }
} }
} }
@ -158,15 +168,25 @@ static void authenticate(Poco::MongoDB::Connection & connection,
MongoDBDictionarySource::MongoDBDictionarySource( MongoDBDictionarySource::MongoDBDictionarySource(
const DictionaryStructure & dict_struct, const std::string & host, UInt16 port, const DictionaryStructure & dict_struct,
const std::string & user, const std::string & password, const std::string & host,
UInt16 port,
const std::string & user,
const std::string & password,
const std::string & method, const std::string & method,
const std::string & db, const std::string & collection, const std::string & db,
const std::string & collection,
const Block & sample_block) const Block & sample_block)
: dict_struct{dict_struct}, host{host}, port{port}, user{user}, password{password}, : dict_struct{dict_struct}
method{method}, , host{host}
db{db}, collection{collection}, sample_block{sample_block}, , port{port}
connection{std::make_shared<Poco::MongoDB::Connection>(host, port)} , user{user}
, password{password}
, method{method}
, db{db}
, collection{collection}
, sample_block{sample_block}
, connection{std::make_shared<Poco::MongoDB::Connection>(host, port)}
{ {
if (!user.empty()) if (!user.empty())
{ {
@ -176,14 +196,15 @@ MongoDBDictionarySource::MongoDBDictionarySource(
# else # else
authenticate(*connection, db, user, password); authenticate(*connection, db, user, password);
# endif # endif
} }
} }
MongoDBDictionarySource::MongoDBDictionarySource( MongoDBDictionarySource::MongoDBDictionarySource(
const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const DictionaryStructure & dict_struct,
const std::string & config_prefix, Block & sample_block) const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block)
: MongoDBDictionarySource( : MongoDBDictionarySource(
dict_struct, dict_struct,
config.getString(config_prefix + ".host"), config.getString(config_prefix + ".host"),
@ -199,10 +220,15 @@ MongoDBDictionarySource::MongoDBDictionarySource(
MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other) MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
: MongoDBDictionarySource{ : MongoDBDictionarySource{other.dict_struct,
other.dict_struct, other.host, other.port, other.user, other.password, other.host,
other.port,
other.user,
other.password,
other.method, other.method,
other.db, other.collection, other.sample_block} other.db,
other.collection,
other.sample_block}
{ {
} }
@ -210,8 +236,8 @@ MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource &
MongoDBDictionarySource::~MongoDBDictionarySource() = default; MongoDBDictionarySource::~MongoDBDictionarySource() = default;
static std::unique_ptr<Poco::MongoDB::Cursor> createCursor( static std::unique_ptr<Poco::MongoDB::Cursor>
const std::string & database, const std::string & collection, const Block & sample_block_to_select) createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select)
{ {
auto cursor = std::make_unique<Poco::MongoDB::Cursor>(database, collection); auto cursor = std::make_unique<Poco::MongoDB::Cursor>(database, collection);
@ -228,8 +254,7 @@ static std::unique_ptr<Poco::MongoDB::Cursor> createCursor(
BlockInputStreamPtr MongoDBDictionarySource::loadAll() BlockInputStreamPtr MongoDBDictionarySource::loadAll()
{ {
return std::make_shared<MongoDBBlockInputStream>( return std::make_shared<MongoDBBlockInputStream>(connection, createCursor(db, collection, sample_block), sample_block, max_block_size);
connection, createCursor(db, collection, sample_block), sample_block, max_block_size);
} }
@ -248,16 +273,13 @@ BlockInputStreamPtr MongoDBDictionarySource::loadIds(const std::vector<UInt64> &
for (const UInt64 id : ids) for (const UInt64 id : ids)
ids_array->add(DB::toString(id), Int32(id)); ids_array->add(DB::toString(id), Int32(id));
cursor->query().selector().addNewDocument(dict_struct.id->name) cursor->query().selector().addNewDocument(dict_struct.id->name).add("$in", ids_array);
.add("$in", ids_array);
return std::make_shared<MongoDBBlockInputStream>( return std::make_shared<MongoDBBlockInputStream>(connection, std::move(cursor), sample_block, max_block_size);
connection, std::move(cursor), sample_block, max_block_size);
} }
BlockInputStreamPtr MongoDBDictionarySource::loadKeys( BlockInputStreamPtr MongoDBDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
if (!dict_struct.key) if (!dict_struct.key)
throw Exception{"'key' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"'key' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD};
@ -314,8 +336,7 @@ BlockInputStreamPtr MongoDBDictionarySource::loadKeys(
/// If more than one key we should use $or /// If more than one key we should use $or
cursor->query().selector().add("$or", keys_array); cursor->query().selector().add("$or", keys_array);
return std::make_shared<MongoDBBlockInputStream>( return std::make_shared<MongoDBBlockInputStream>(connection, std::move(cursor), sample_block, max_block_size);
connection, std::move(cursor), sample_block, max_block_size);
} }

View File

@ -3,8 +3,8 @@
#include <Common/config.h> #include <Common/config.h>
#if USE_POCO_MONGODB #if USE_POCO_MONGODB
#include "IDictionarySource.h"
# include "DictionaryStructure.h" # include "DictionaryStructure.h"
# include "IDictionarySource.h"
namespace Poco namespace Poco
{ {
@ -22,21 +22,26 @@ namespace Poco
namespace DB namespace DB
{ {
/// Allows loading dictionaries from a MongoDB collection /// Allows loading dictionaries from a MongoDB collection
class MongoDBDictionarySource final : public IDictionarySource class MongoDBDictionarySource final : public IDictionarySource
{ {
MongoDBDictionarySource( MongoDBDictionarySource(
const DictionaryStructure & dict_struct, const std::string & host, UInt16 port, const DictionaryStructure & dict_struct,
const std::string & user, const std::string & password, const std::string & host,
UInt16 port,
const std::string & user,
const std::string & password,
const std::string & method, const std::string & method,
const std::string & db, const std::string & collection, const std::string & db,
const std::string & collection,
const Block & sample_block); const Block & sample_block);
public: public:
MongoDBDictionarySource( MongoDBDictionarySource(
const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const DictionaryStructure & dict_struct,
const std::string & config_prefix, Block & sample_block); const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block);
MongoDBDictionarySource(const MongoDBDictionarySource & other); MongoDBDictionarySource(const MongoDBDictionarySource & other);
@ -53,8 +58,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override; BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
/// @todo: for MongoDB, modification date can somehow be determined from the `_id` object field /// @todo: for MongoDB, modification date can somehow be determined from the `_id` object field
bool isModified() const override { return true; } bool isModified() const override { return true; }

View File

@ -1,19 +1,18 @@
#include <Common/config.h> #include <Common/config.h>
#if USE_MYSQL #if USE_MYSQL
#include "MySQLBlockInputStream.h" # include <vector>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
# include <Columns/ColumnNullable.h> # include <Columns/ColumnNullable.h>
# include <Columns/ColumnString.h>
# include <Columns/ColumnsNumber.h>
# include <IO/ReadHelpers.h> # include <IO/ReadHelpers.h>
# include <IO/WriteHelpers.h> # include <IO/WriteHelpers.h>
# include <ext/range.h> # include <ext/range.h>
#include <vector> # include "MySQLBlockInputStream.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
@ -21,13 +20,12 @@ namespace ErrorCodes
MySQLBlockInputStream::MySQLBlockInputStream( MySQLBlockInputStream::MySQLBlockInputStream(
const mysqlxx::PoolWithFailover::Entry & entry, const std::string & query_str, const Block & sample_block, const mysqlxx::PoolWithFailover::Entry & entry, const std::string & query_str, const Block & sample_block, const size_t max_block_size)
const size_t max_block_size) : entry{entry}, query{this->entry->query(query_str)}, result{query.use()}, max_block_size{max_block_size}
: entry{entry}, query{this->entry->query(query_str)}, result{query.use()},
max_block_size{max_block_size}
{ {
if (sample_block.columns() != result.getNumFields()) if (sample_block.columns() != result.getNumFields())
throw Exception{"mysqlxx::UseQueryResult contains " + toString(result.getNumFields()) + " columns while " + toString(sample_block.columns()) + " expected", throw Exception{"mysqlxx::UseQueryResult contains " + toString(result.getNumFields()) + " columns while "
+ toString(sample_block.columns()) + " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
description.init(sample_block); description.init(sample_block);
@ -42,27 +40,52 @@ namespace
{ {
switch (type) switch (type)
{ {
case ValueType::UInt8: static_cast<ColumnUInt8 &>(column).insertValue(value.getUInt()); break; case ValueType::UInt8:
case ValueType::UInt16: static_cast<ColumnUInt16 &>(column).insertValue(value.getUInt()); break; static_cast<ColumnUInt8 &>(column).insertValue(value.getUInt());
case ValueType::UInt32: static_cast<ColumnUInt32 &>(column).insertValue(value.getUInt()); break; break;
case ValueType::UInt64: static_cast<ColumnUInt64 &>(column).insertValue(value.getUInt()); break; case ValueType::UInt16:
case ValueType::Int8: static_cast<ColumnInt8 &>(column).insertValue(value.getInt()); break; static_cast<ColumnUInt16 &>(column).insertValue(value.getUInt());
case ValueType::Int16: static_cast<ColumnInt16 &>(column).insertValue(value.getInt()); break; break;
case ValueType::Int32: static_cast<ColumnInt32 &>(column).insertValue(value.getInt()); break; case ValueType::UInt32:
case ValueType::Int64: static_cast<ColumnInt64 &>(column).insertValue(value.getInt()); break; static_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
case ValueType::Float32: static_cast<ColumnFloat32 &>(column).insertValue(value.getDouble()); break; break;
case ValueType::Float64: static_cast<ColumnFloat64 &>(column).insertValue(value.getDouble()); break; case ValueType::UInt64:
case ValueType::String: static_cast<ColumnString &>(column).insertData(value.data(), value.size()); break; static_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
case ValueType::Date: static_cast<ColumnUInt16 &>(column).insertValue(UInt16(value.getDate().getDayNum())); break; break;
case ValueType::DateTime: static_cast<ColumnUInt32 &>(column).insertValue(UInt32(value.getDateTime())); break; case ValueType::Int8:
case ValueType::UUID: static_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size())); break; static_cast<ColumnInt8 &>(column).insertValue(value.getInt());
break;
case ValueType::Int16:
static_cast<ColumnInt16 &>(column).insertValue(value.getInt());
break;
case ValueType::Int32:
static_cast<ColumnInt32 &>(column).insertValue(value.getInt());
break;
case ValueType::Int64:
static_cast<ColumnInt64 &>(column).insertValue(value.getInt());
break;
case ValueType::Float32:
static_cast<ColumnFloat32 &>(column).insertValue(value.getDouble());
break;
case ValueType::Float64:
static_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
break;
case ValueType::String:
static_cast<ColumnString &>(column).insertData(value.data(), value.size());
break;
case ValueType::Date:
static_cast<ColumnUInt16 &>(column).insertValue(UInt16(value.getDate().getDayNum()));
break;
case ValueType::DateTime:
static_cast<ColumnUInt32 &>(column).insertValue(UInt32(value.getDateTime()));
break;
case ValueType::UUID:
static_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
} }
} }
void insertDefaultValue(IColumn & column, const IColumn & sample_column) void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
{
column.insertFrom(sample_column, 0);
}
} }

View File

@ -1,22 +1,23 @@
#pragma once #pragma once
#include <string>
#include <Core/Block.h> #include <Core/Block.h>
#include <DataStreams/IProfilingBlockInputStream.h> #include <DataStreams/IProfilingBlockInputStream.h>
#include "ExternalResultDescription.h"
#include <mysqlxx/Query.h>
#include <mysqlxx/PoolWithFailover.h> #include <mysqlxx/PoolWithFailover.h>
#include <string> #include <mysqlxx/Query.h>
#include "ExternalResultDescription.h"
namespace DB namespace DB
{ {
/// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining /// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining
class MySQLBlockInputStream final : public IProfilingBlockInputStream class MySQLBlockInputStream final : public IProfilingBlockInputStream
{ {
public: public:
MySQLBlockInputStream( MySQLBlockInputStream(
const mysqlxx::PoolWithFailover::Entry & entry, const std::string & query_str, const Block & sample_block, const mysqlxx::PoolWithFailover::Entry & entry,
const std::string & query_str,
const Block & sample_block,
const size_t max_block_size); const size_t max_block_size);
String getName() const override { return "MySQL"; } String getName() const override { return "MySQL"; }

View File

@ -1,14 +1,13 @@
#include "MySQLDictionarySource.h" #include "MySQLDictionarySource.h"
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
#include <Common/config.h> #include <Common/config.h>
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
@ -39,56 +38,59 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
#if USE_MYSQL #if USE_MYSQL
#include <IO/WriteBufferFromString.h>
#include <DataTypes/DataTypeString.h>
# include <Columns/ColumnString.h> # include <Columns/ColumnString.h>
#include <common/logger_useful.h> # include <DataTypes/DataTypeString.h>
# include <IO/WriteBufferFromString.h>
# include <IO/WriteHelpers.h>
# include <common/LocalDateTime.h> # include <common/LocalDateTime.h>
# include <common/logger_useful.h>
# include "MySQLBlockInputStream.h" # include "MySQLBlockInputStream.h"
# include "readInvalidateQuery.h" # include "readInvalidateQuery.h"
#include <IO/WriteHelpers.h>
namespace DB namespace DB
{ {
static const size_t max_block_size = 8192; static const size_t max_block_size = 8192;
MySQLDictionarySource::MySQLDictionarySource(const DictionaryStructure & dict_struct_, MySQLDictionarySource::MySQLDictionarySource(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const Block & sample_block) const Block & sample_block)
: log(&Logger::get("MySQLDictionarySource")), : log(&Logger::get("MySQLDictionarySource"))
update_time{std::chrono::system_clock::from_time_t(0)}, , update_time{std::chrono::system_clock::from_time_t(0)}
dict_struct{dict_struct_}, , dict_struct{dict_struct_}
db{config.getString(config_prefix + ".db", "")}, , db{config.getString(config_prefix + ".db", "")}
table{config.getString(config_prefix + ".table")}, , table{config.getString(config_prefix + ".table")}
where{config.getString(config_prefix + ".where", "")}, , where{config.getString(config_prefix + ".where", "")}
update_field{config.getString(config_prefix + ".update_field", "")}, , update_field{config.getString(config_prefix + ".update_field", "")}
dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)}, , dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)}
sample_block{sample_block}, , sample_block{sample_block}
pool{config, config_prefix}, , pool{config, config_prefix}
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
load_all_query{query_builder.composeLoadAllQuery()}, , load_all_query{query_builder.composeLoadAllQuery()}
invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
{ {
} }
/// copy-constructor is provided in order to support cloneability /// copy-constructor is provided in order to support cloneability
MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other) MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other)
: log(&Logger::get("MySQLDictionarySource")), : log(&Logger::get("MySQLDictionarySource"))
update_time{other.update_time}, , update_time{other.update_time}
dict_struct{other.dict_struct}, , dict_struct{other.dict_struct}
db{other.db}, , db{other.db}
table{other.table}, , table{other.table}
where{other.where}, , where{other.where}
update_field{other.update_field}, , update_field{other.update_field}
dont_check_update_time{other.dont_check_update_time}, , dont_check_update_time{other.dont_check_update_time}
sample_block{other.sample_block}, , sample_block{other.sample_block}
pool{other.pool}, , pool{other.pool}
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
load_all_query{other.load_all_query}, last_modification{other.last_modification}, , load_all_query{other.load_all_query}
invalidate_query{other.invalidate_query}, invalidate_query_response{other.invalidate_query_response} , last_modification{other.last_modification}
, invalidate_query{other.invalidate_query}
, invalidate_query_response{other.invalidate_query_response}
{ {
} }
@ -135,8 +137,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & i
return std::make_shared<MySQLBlockInputStream>(pool.Get(), query, sample_block, max_block_size); return std::make_shared<MySQLBlockInputStream>(pool.Get(), query, sample_block, max_block_size);
} }
BlockInputStreamPtr MySQLDictionarySource::loadKeys( BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
/// We do not log in here and do not update the modification time, as the request can be large, and often called. /// We do not log in here and do not update the modification time, as the request can be large, and often called.

View File

@ -3,11 +3,11 @@
#include <Common/config.h> #include <Common/config.h>
#if USE_MYSQL #if USE_MYSQL
#include "IDictionarySource.h"
#include "ExternalQueryBuilder.h"
#include "DictionaryStructure.h"
# include <common/LocalDateTime.h> # include <common/LocalDateTime.h>
# include <mysqlxx/PoolWithFailover.h> # include <mysqlxx/PoolWithFailover.h>
# include "DictionaryStructure.h"
# include "ExternalQueryBuilder.h"
# include "IDictionarySource.h"
namespace Poco namespace Poco
@ -23,14 +23,14 @@ namespace Poco
namespace DB namespace DB
{ {
/// Allows loading dictionaries from a MySQL database /// Allows loading dictionaries from a MySQL database
class MySQLDictionarySource final : public IDictionarySource class MySQLDictionarySource final : public IDictionarySource
{ {
public: public:
MySQLDictionarySource(const DictionaryStructure & dict_struct_, MySQLDictionarySource(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const Block & sample_block); const Block & sample_block);
/// copy-constructor is provided in order to support cloneability /// copy-constructor is provided in order to support cloneability
@ -42,8 +42,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override; BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys( BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override; bool isModified() const override;

View File

@ -1,17 +1,16 @@
#include "ODBCBlockInputStream.h" #include "ODBCBlockInputStream.h"
#include <Columns/ColumnsNumber.h> #include <vector>
#include <Columns/ColumnString.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <ext/range.h> #include <ext/range.h>
#include <vector>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
@ -19,19 +18,18 @@ namespace ErrorCodes
ODBCBlockInputStream::ODBCBlockInputStream( ODBCBlockInputStream::ODBCBlockInputStream(
Poco::Data::Session && session, const std::string & query_str, const Block & sample_block, Poco::Data::Session && session, const std::string & query_str, const Block & sample_block, const size_t max_block_size)
const size_t max_block_size) : session{session}
: , statement{(this->session << query_str, Poco::Data::Keywords::now)}
session{session}, , result{statement}
statement{(this->session << query_str, Poco::Data::Keywords::now)}, , iterator{result.begin()}
result{statement}, , max_block_size{max_block_size}
iterator{result.begin()}, , log(&Logger::get("ODBCBlockInputStream"))
max_block_size{max_block_size},
log(&Logger::get("ODBCBlockInputStream"))
{ {
if (sample_block.columns() != result.columnCount()) if (sample_block.columns() != result.columnCount())
throw Exception{"RecordSet contains " + toString(result.columnCount()) + " columns while " + throw Exception{"RecordSet contains " + toString(result.columnCount()) + " columns while " + toString(sample_block.columns())
toString(sample_block.columns()) + " expected", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; + " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
description.init(sample_block); description.init(sample_block);
} }
@ -45,28 +43,52 @@ namespace
{ {
switch (type) switch (type)
{ {
case ValueType::UInt8: static_cast<ColumnUInt8 &>(column).insertValue(value.convert<UInt64>()); break; case ValueType::UInt8:
case ValueType::UInt16: static_cast<ColumnUInt16 &>(column).insertValue(value.convert<UInt64>()); break; static_cast<ColumnUInt8 &>(column).insertValue(value.convert<UInt64>());
case ValueType::UInt32: static_cast<ColumnUInt32 &>(column).insertValue(value.convert<UInt64>()); break; break;
case ValueType::UInt64: static_cast<ColumnUInt64 &>(column).insertValue(value.convert<UInt64>()); break; case ValueType::UInt16:
case ValueType::Int8: static_cast<ColumnInt8 &>(column).insertValue(value.convert<Int64>()); break; static_cast<ColumnUInt16 &>(column).insertValue(value.convert<UInt64>());
case ValueType::Int16: static_cast<ColumnInt16 &>(column).insertValue(value.convert<Int64>()); break; break;
case ValueType::Int32: static_cast<ColumnInt32 &>(column).insertValue(value.convert<Int64>()); break; case ValueType::UInt32:
case ValueType::Int64: static_cast<ColumnInt64 &>(column).insertValue(value.convert<Int64>()); break; static_cast<ColumnUInt32 &>(column).insertValue(value.convert<UInt64>());
case ValueType::Float32: static_cast<ColumnFloat32 &>(column).insertValue(value.convert<Float64>()); break; break;
case ValueType::Float64: static_cast<ColumnFloat64 &>(column).insertValue(value.convert<Float64>()); break; case ValueType::UInt64:
case ValueType::String: static_cast<ColumnString &>(column).insert(value.convert<String>()); break; static_cast<ColumnUInt64 &>(column).insertValue(value.convert<UInt64>());
case ValueType::Date: static_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{value.convert<String>()}.getDayNum()}); break; break;
case ValueType::DateTime: static_cast<ColumnUInt32 &>(column).insertValue(time_t{LocalDateTime{value.convert<String>()}}); break; case ValueType::Int8:
case ValueType::UUID: static_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.convert<std::string>())); break; static_cast<ColumnInt8 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::Int16:
static_cast<ColumnInt16 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::Int32:
static_cast<ColumnInt32 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::Int64:
static_cast<ColumnInt64 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::Float32:
static_cast<ColumnFloat32 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::Float64:
static_cast<ColumnFloat64 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::String:
static_cast<ColumnString &>(column).insert(value.convert<String>());
break;
case ValueType::Date:
static_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{value.convert<String>()}.getDayNum()});
break;
case ValueType::DateTime:
static_cast<ColumnUInt32 &>(column).insertValue(time_t{LocalDateTime{value.convert<String>()}});
break;
case ValueType::UUID:
static_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.convert<std::string>()));
break;
} }
} }
void insertDefaultValue(IColumn & column, const IColumn & sample_column) void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
{
column.insertFrom(sample_column, 0);
}
} }
@ -108,7 +130,6 @@ Block ODBCBlockInputStream::readImpl()
++num_rows; ++num_rows;
if (num_rows == max_block_size) if (num_rows == max_block_size)
break; break;
} }
return description.sample_block.cloneWithColumns(std::move(columns)); return description.sample_block.cloneWithColumns(std::move(columns));

View File

@ -1,24 +1,22 @@
#pragma once #pragma once
#include <string>
#include <Core/Block.h> #include <Core/Block.h>
#include <DataStreams/IProfilingBlockInputStream.h> #include <DataStreams/IProfilingBlockInputStream.h>
#include "ExternalResultDescription.h" #include <Poco/Data/RecordSet.h>
#include <Poco/Data/Session.h> #include <Poco/Data/Session.h>
#include <Poco/Data/Statement.h> #include <Poco/Data/Statement.h>
#include <Poco/Data/RecordSet.h> #include "ExternalResultDescription.h"
#include <string>
namespace DB namespace DB
{ {
/// Allows processing results of a query to ODBC source as a sequence of Blocks, simplifies chaining /// Allows processing results of a query to ODBC source as a sequence of Blocks, simplifies chaining
class ODBCBlockInputStream final : public IProfilingBlockInputStream class ODBCBlockInputStream final : public IProfilingBlockInputStream
{ {
public: public:
ODBCBlockInputStream( ODBCBlockInputStream(
Poco::Data::Session && session, const std::string & query_str, const Block & sample_block, Poco::Data::Session && session, const std::string & query_str, const Block & sample_block, const size_t max_block_size);
const size_t max_block_size);
String getName() const override { return "ODBC"; } String getName() const override { return "ODBC"; }

View File

@ -1,19 +1,18 @@
#pragma once #pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h> #include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h> #include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypesNumber.h>
#include <ext/range.h>
#include "DictionaryBlockInputStreamBase.h" #include "DictionaryBlockInputStreamBase.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "IDictionary.h" #include "IDictionary.h"
#include "RangeHashedDictionary.h" #include "RangeHashedDictionary.h"
#include <ext/range.h>
namespace DB namespace DB
{ {
/* /*
* BlockInputStream implementation for external dictionaries * BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries * read() returns single block consisting of the in-memory contents of the dictionaries
@ -25,46 +24,58 @@ public:
using DictionaryPtr = std::shared_ptr<DictionaryType const>; using DictionaryPtr = std::shared_ptr<DictionaryType const>;
RangeDictionaryBlockInputStream( RangeDictionaryBlockInputStream(
DictionaryPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids_to_fill, DictionaryPtr dictionary,
PaddedPODArray<RangeType> && start_dates, PaddedPODArray<RangeType> && end_dates); size_t max_block_size,
const Names & column_names,
PaddedPODArray<Key> && ids_to_fill,
PaddedPODArray<RangeType> && start_dates,
PaddedPODArray<RangeType> && end_dates);
String getName() const override String getName() const override { return "RangeDictionary"; }
{
return "RangeDictionary";
}
protected: protected:
Block getBlock(size_t start, size_t length) const override; Block getBlock(size_t start, size_t length) const override;
private: private:
template <typename Type> template <typename Type>
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, using DictionaryGetter = void (DictionaryType::*)(
const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const; const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
template <typename Type> template <typename Type>
using DictionaryDecimalGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, using DictionaryDecimalGetter = void (DictionaryType::*)(
const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const; const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
template <typename AttributeType, typename Getter> template <typename AttributeType, typename Getter>
ColumnPtr getColumnFromAttribute(Getter getter, ColumnPtr getColumnFromAttribute(
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates, Getter getter,
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const; const PaddedPODArray<Key> & ids_to_fill,
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates, const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const; const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
ColumnPtr getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const;
template <typename T> template <typename T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const; ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
template <typename DictionarySpecialAttributeType, typename T> template <typename DictionarySpecialAttributeType, typename T>
void addSpecialColumn( void addSpecialColumn(
const std::optional<DictionarySpecialAttributeType> & attribute, DataTypePtr type, const std::optional<DictionarySpecialAttributeType> & attribute,
const std::string & default_name, const std::unordered_set<std::string> & column_names_set, DataTypePtr type,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const; const std::string & default_name,
const std::unordered_set<std::string> & column_names_set,
const PaddedPODArray<T> & values,
ColumnsWithTypeAndName & columns) const;
Block fillBlock(const PaddedPODArray<Key> & ids_to_fill, Block fillBlock(
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const; const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const;
PaddedPODArray<Int64> makeDateKey( PaddedPODArray<Int64>
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const; makeDateKey(const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const;
DictionaryPtr dictionary; DictionaryPtr dictionary;
Names column_names; Names column_names;
@ -76,11 +87,18 @@ private:
template <typename DictionaryType, typename RangeType, typename Key> template <typename DictionaryType, typename RangeType, typename Key>
RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionaryBlockInputStream( RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionaryBlockInputStream(
DictionaryPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids, DictionaryPtr dictionary,
PaddedPODArray<RangeType> && block_start_dates, PaddedPODArray<RangeType> && block_end_dates) size_t max_column_size,
: DictionaryBlockInputStreamBase(ids.size(), max_column_size), const Names & column_names,
dictionary(dictionary), column_names(column_names), PaddedPODArray<Key> && ids,
ids(std::move(ids)), start_dates(std::move(block_start_dates)), end_dates(std::move(block_end_dates)) PaddedPODArray<RangeType> && block_start_dates,
PaddedPODArray<RangeType> && block_end_dates)
: DictionaryBlockInputStreamBase(ids.size(), max_column_size)
, dictionary(dictionary)
, column_names(column_names)
, ids(std::move(ids))
, start_dates(std::move(block_start_dates))
, end_dates(std::move(block_end_dates))
{ {
} }
@ -107,8 +125,11 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
template <typename DictionaryType, typename RangeType, typename Key> template <typename DictionaryType, typename RangeType, typename Key>
template <typename AttributeType, typename Getter> template <typename AttributeType, typename Getter>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute( ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
Getter getter, const PaddedPODArray<Key> & ids_to_fill, Getter getter,
const PaddedPODArray<Int64> & dates, const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{ {
if constexpr (IsDecimalNumber<AttributeType>) if constexpr (IsDecimalNumber<AttributeType>)
{ {
@ -126,8 +147,10 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
template <typename DictionaryType, typename RangeType, typename Key> template <typename DictionaryType, typename RangeType, typename Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString( ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Int64> & dates, const PaddedPODArray<Key> & ids_to_fill,
const DictionaryAttribute & attribute, const DictionaryType & concrete_dictionary) const const PaddedPODArray<Int64> & dates,
const DictionaryAttribute & attribute,
const DictionaryType & concrete_dictionary) const
{ {
auto column_string = ColumnString::create(); auto column_string = ColumnString::create();
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get()); concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
@ -149,9 +172,12 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
template <typename DictionaryType, typename RangeType, typename Key> template <typename DictionaryType, typename RangeType, typename Key>
template <typename DictionarySpecialAttributeType, typename T> template <typename DictionarySpecialAttributeType, typename T>
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn( void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
const std::optional<DictionarySpecialAttributeType> & attribute, DataTypePtr type, const std::optional<DictionarySpecialAttributeType> & attribute,
const std::string & default_name, const std::unordered_set<std::string> & column_names_set, DataTypePtr type,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const const std::string & default_name,
const std::unordered_set<std::string> & column_names_set,
const PaddedPODArray<T> & values,
ColumnsWithTypeAndName & columns) const
{ {
std::string name = default_name; std::string name = default_name;
if (attribute) if (attribute)
@ -181,7 +207,8 @@ PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, RangeType,
template <typename DictionaryType, typename RangeType, typename Key> template <typename DictionaryType, typename RangeType, typename Key>
Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock( Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock(
const PaddedPODArray<Key> & ids_to_fill, const PaddedPODArray<Key> & ids_to_fill,
const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const const PaddedPODArray<RangeType> & block_start_dates,
const PaddedPODArray<RangeType> & block_end_dates) const
{ {
ColumnsWithTypeAndName columns; ColumnsWithTypeAndName columns;
const DictionaryStructure & structure = dictionary->getStructure(); const DictionaryStructure & structure = dictionary->getStructure();

View File

@ -1,14 +1,13 @@
#include "RangeHashedDictionary.h" #include "RangeHashedDictionary.h"
#include "RangeDictionaryBlockInputStream.h"
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Functions/FunctionHelpers.h>
#include <Common/TypeList.h> #include <Common/TypeList.h>
#include <ext/range.h> #include <ext/range.h>
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
#include "RangeDictionaryBlockInputStream.h"
namespace namespace
{ {
using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType; using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType;
// Null values mean that specified boundary, either min or max is not set on range. // Null values mean that specified boundary, either min or max is not set on range.
@ -44,7 +43,6 @@ const DB::IColumn & unwrapNullableColumn(const DB::IColumn & column)
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
@ -70,11 +68,16 @@ bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDicti
RangeHashedDictionary::RangeHashedDictionary( RangeHashedDictionary::RangeHashedDictionary(
const std::string & dictionary_name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & dictionary_name,
const DictionaryLifetime dict_lifetime, bool require_nonempty) const DictionaryStructure & dict_struct,
: dictionary_name{dictionary_name}, dict_struct(dict_struct), DictionarySourcePtr source_ptr,
source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), const DictionaryLifetime dict_lifetime,
require_nonempty(require_nonempty) bool require_nonempty)
: dictionary_name{dictionary_name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, require_nonempty(require_nonempty)
{ {
createAttributes(); createAttributes();
@ -92,14 +95,17 @@ RangeHashedDictionary::RangeHashedDictionary(
} }
RangeHashedDictionary::RangeHashedDictionary(const RangeHashedDictionary & other) RangeHashedDictionary::RangeHashedDictionary(const RangeHashedDictionary & other)
: RangeHashedDictionary{other.dictionary_name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} : RangeHashedDictionary{
other.dictionary_name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty}
{ {
} }
#define DECLARE_MULTIPLE_GETTER(TYPE) \ #define DECLARE_MULTIPLE_GETTER(TYPE) \
void RangeHashedDictionary::get##TYPE( \ void RangeHashedDictionary::get##TYPE( \
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<RangeStorageType> & dates,\ const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<RangeStorageType> & dates, \
ResultArrayType<TYPE> & out) const \ ResultArrayType<TYPE> & out) const \
{ \ { \
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::TYPE); \ const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::TYPE); \
@ -122,7 +128,9 @@ DECLARE_MULTIPLE_GETTER(Decimal128)
#undef DECLARE_MULTIPLE_GETTER #undef DECLARE_MULTIPLE_GETTER
void RangeHashedDictionary::getString( void RangeHashedDictionary::getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<RangeStorageType> & dates, const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const ColumnString * out) const
{ {
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::String); const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::String);
@ -136,8 +144,11 @@ void RangeHashedDictionary::getString(
{ {
const auto date = dates[i]; const auto date = dates[i];
const auto & ranges_and_values = it->second; const auto & ranges_and_values = it->second;
const auto val_it = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), const auto val_it
[date] (const Value<StringRef> & v) { return v.range.contains(date); }); = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{
return v.range.contains(date);
});
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value}; const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
out->insertData(string_ref.data, string_ref.size); out->insertData(string_ref.data, string_ref.size);
@ -161,7 +172,8 @@ void RangeHashedDictionary::createAttributes()
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
throw Exception{dictionary_name + ": hierarchical attributes not supported by " + getName() + " dictionary.", ErrorCodes::BAD_ARGUMENTS}; throw Exception{dictionary_name + ": hierarchical attributes not supported by " + getName() + " dictionary.",
ErrorCodes::BAD_ARGUMENTS};
} }
} }
@ -203,9 +215,7 @@ void RangeHashedDictionary::loadData()
upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE); upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE);
} }
setAttributeValue(attribute, id_column.getUInt(row_idx), setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]);
Range{lower_bound, upper_bound},
attribute_column[row_idx]);
} }
} }
} }
@ -213,7 +223,8 @@ void RangeHashedDictionary::loadData()
stream->readSuffix(); stream->readSuffix();
if (require_nonempty && 0 == element_count) if (require_nonempty && 0 == element_count)
throw Exception{dictionary_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY}; throw Exception{dictionary_name + ": dictionary source is empty and 'require_nonempty' property is set.",
ErrorCodes::DICTIONARY_IS_EMPTY};
} }
template <typename T> template <typename T>
@ -232,21 +243,49 @@ void RangeHashedDictionary::calculateBytesAllocated()
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break; addAttributeSize<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break; break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: addAttributeSize<UInt128>(attribute); break; addAttributeSize<UInt16>(attribute);
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break; break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break; addAttributeSize<UInt32>(attribute);
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break; break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break; addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::UInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::Int8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::Int16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::Int32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::Int64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::Float32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::Float64:
addAttributeSize<Float64>(attribute);
break;
case AttributeUnderlyingType::Decimal32: addAttributeSize<Decimal32>(attribute); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: addAttributeSize<Decimal64>(attribute); break; addAttributeSize<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: addAttributeSize<Decimal128>(attribute); break; break;
case AttributeUnderlyingType::Decimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::Decimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -266,27 +305,56 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
attribute.maps = std::make_unique<Collection<T>>(); attribute.maps = std::make_unique<Collection<T>>();
} }
RangeHashedDictionary::Attribute RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) RangeHashedDictionary::Attribute
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{ {
Attribute attr{type, {}, {}, {}}; Attribute attr{type, {}, {}, {}};
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break; createAttributeImpl<UInt8>(attr, null_value);
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: createAttributeImpl<UInt128>(attr, null_value); break; createAttributeImpl<UInt16>(attr, null_value);
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break; break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break; createAttributeImpl<UInt32>(attr, null_value);
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break; break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break; createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::UInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::Int8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::Int16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::Int32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::Int64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::Float32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::Float64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal32: createAttributeImpl<Decimal32>(attr, null_value); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: createAttributeImpl<Decimal64>(attr, null_value); break; createAttributeImpl<Decimal32>(attr, null_value);
case AttributeUnderlyingType::Decimal128: createAttributeImpl<Decimal128>(attr, null_value); break; break;
case AttributeUnderlyingType::Decimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -308,10 +376,10 @@ void RangeHashedDictionary::getItems(
const PaddedPODArray<RangeStorageType> & dates, const PaddedPODArray<RangeStorageType> & dates,
PaddedPODArray<OutputType> & out) const PaddedPODArray<OutputType> & out) const
{ {
if (false) {} if (false)
#define DISPATCH(TYPE) \ {
else if (attribute.type == AttributeUnderlyingType::TYPE) \ }
getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out); #define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
DISPATCH(UInt8) DISPATCH(UInt8)
DISPATCH(UInt16) DISPATCH(UInt16)
DISPATCH(UInt32) DISPATCH(UInt32)
@ -327,8 +395,7 @@ void RangeHashedDictionary::getItems(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType> template <typename AttributeType, typename OutputType>
@ -348,8 +415,11 @@ void RangeHashedDictionary::getItemsImpl(
{ {
const auto date = dates[i]; const auto date = dates[i];
const auto & ranges_and_values = it->second; const auto & ranges_and_values = it->second;
const auto val_it = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), const auto val_it
[date] (const Value<AttributeType> & v) { return v.range.contains(date); }); = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{
return v.range.contains(date);
});
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value);
} }
@ -373,8 +443,8 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
{ {
auto & values = it->second; auto & values = it->second;
const auto insert_it = std::lower_bound(std::begin(values), std::end(values), range, const auto insert_it
[] (const Value<T> & lhs, const Range & rhs_range) = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
{ {
return lhs.range < rhs_range; return lhs.range < rhs_range;
}); });
@ -389,17 +459,39 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>()); break; setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>()); break; break;
case AttributeUnderlyingType::UInt64: setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>()); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>()); break; setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
case AttributeUnderlyingType::Int8: setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Int16: setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>()); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>()); break; setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
case AttributeUnderlyingType::Int64: setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>()); break; break;
case AttributeUnderlyingType::Float32: setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>()); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>()); break; setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
break;
case AttributeUnderlyingType::UInt128:
setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
break;
case AttributeUnderlyingType::Int8:
setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::Int16:
setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::Int32:
setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::Int64:
setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
break;
case AttributeUnderlyingType::Float32:
setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
break;
case AttributeUnderlyingType::Float64:
setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
break;
case AttributeUnderlyingType::Decimal32: case AttributeUnderlyingType::Decimal32:
setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>()); setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
@ -424,8 +516,8 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
{ {
auto & values = it->second; auto & values = it->second;
const auto insert_it = std::lower_bound(std::begin(values), std::end(values), range, const auto insert_it = std::lower_bound(
[] (const Value<StringRef> & lhs, const Range & rhs_range) std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
{ {
return lhs.range < rhs_range; return lhs.range < rhs_range;
}); });
@ -449,45 +541,78 @@ const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(con
return attributes[it->second]; return attributes[it->second];
} }
const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const const RangeHashedDictionary::Attribute &
RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const
{ {
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (attribute.type != type) if (attribute.type != type)
throw Exception{attribute_name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{attribute_name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
return attribute; return attribute;
} }
template <typename RangeType> template <typename RangeType>
void RangeHashedDictionary::getIdsAndDates(PaddedPODArray<Key> & ids, void RangeHashedDictionary::getIdsAndDates(
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<Key> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const
PaddedPODArray<RangeType> & end_dates) const
{ {
const auto & attribute = attributes.front(); const auto & attribute = attributes.front();
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates); break; getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
case AttributeUnderlyingType::UInt32: getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates); break; break;
case AttributeUnderlyingType::UInt64: getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates); break; getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
case AttributeUnderlyingType::Int8: getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates); break; break;
case AttributeUnderlyingType::Int16: getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates); break; getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
case AttributeUnderlyingType::Int64: getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates); break; break;
case AttributeUnderlyingType::Float32: getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates); break; getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
case AttributeUnderlyingType::String: getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates); break; break;
case AttributeUnderlyingType::UInt128:
getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Int8:
getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Int16:
getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Int32:
getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Int64:
getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Float32:
getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Float64:
getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::String:
getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Decimal32: getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates); break; getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
case AttributeUnderlyingType::Decimal128: getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates); break; break;
case AttributeUnderlyingType::Decimal64:
getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
break;
case AttributeUnderlyingType::Decimal128:
getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
break;
} }
} }
template <typename T, typename RangeType> template <typename T, typename RangeType>
void RangeHashedDictionary::getIdsAndDates(const Attribute & attribute, PaddedPODArray<Key> & ids, void RangeHashedDictionary::getIdsAndDates(
const Attribute & attribute,
PaddedPODArray<Key> & ids,
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const PaddedPODArray<RangeType> & end_dates) const
{ {
@ -556,8 +681,8 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & col
ListType::forEach(callable); ListType::forEach(callable);
if (!callable.stream) if (!callable.stream)
throw Exception("Unexpected range type for RangeHashed dictionary: " + dict_struct.range_min->type->getName(), throw Exception(
ErrorCodes::LOGICAL_ERROR); "Unexpected range type for RangeHashed dictionary: " + dict_struct.range_min->type->getName(), ErrorCodes::LOGICAL_ERROR);
return callable.stream; return callable.stream;
} }
@ -565,13 +690,12 @@ BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & col
void registerDictionaryRangeHashed(DictionaryFactory & factory) void registerDictionaryRangeHashed(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (dict_struct.key) if (dict_struct.key)
throw Exception{"'key' is not supported for dictionary of layout 'range_hashed'", ErrorCodes::UNSUPPORTED_METHOD}; throw Exception{"'key' is not supported for dictionary of layout 'range_hashed'", ErrorCodes::UNSUPPORTED_METHOD};

View File

@ -1,11 +1,11 @@
#pragma once #pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Common/HashTable/HashMap.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <atomic> #include <atomic>
#include <memory> #include <memory>
@ -14,13 +14,15 @@
namespace DB namespace DB
{ {
class RangeHashedDictionary final : public IDictionaryBase class RangeHashedDictionary final : public IDictionaryBase
{ {
public: public:
RangeHashedDictionary( RangeHashedDictionary(
const std::string & dictionary_name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & dictionary_name,
const DictionaryLifetime dict_lifetime, bool require_nonempty); const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty);
RangeHashedDictionary(const RangeHashedDictionary & other); RangeHashedDictionary(const RangeHashedDictionary & other);
@ -50,10 +52,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -88,7 +87,9 @@ public:
#undef DECLARE_MULTIPLE_GETTER #undef DECLARE_MULTIPLE_GETTER
void getString( void getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<RangeStorageType> & dates, const std::string & attribute_name,
const PaddedPODArray<Key> & ids,
const PaddedPODArray<RangeStorageType> & dates,
ColumnString * out) const; ColumnString * out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
@ -110,25 +111,51 @@ private:
T value; T value;
}; };
template <typename T> using Values = std::vector<Value<T>>; template <typename T>
template <typename T> using Collection = HashMap<UInt64, Values<T>>; using Values = std::vector<Value<T>>;
template <typename T> using Ptr = std::unique_ptr<Collection<T>>; template <typename T>
using Collection = HashMap<UInt64, Values<T>>;
template <typename T>
using Ptr = std::unique_ptr<Collection<T>>;
struct Attribute final struct Attribute final
{ {
public: public:
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant<UInt8, UInt16, UInt32, UInt64, std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
String> null_values; Int64,
std::variant<Ptr<UInt8>, Ptr<UInt16>, Ptr<UInt32>, Ptr<UInt64>, Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant<
Ptr<UInt8>,
Ptr<UInt16>,
Ptr<UInt32>,
Ptr<UInt64>,
Ptr<UInt128>, Ptr<UInt128>,
Ptr<Int8>, Ptr<Int16>, Ptr<Int32>, Ptr<Int64>, Ptr<Int8>,
Ptr<Decimal32>, Ptr<Decimal64>, Ptr<Decimal128>, Ptr<Int16>,
Ptr<Float32>, Ptr<Float64>, Ptr<StringRef>> maps; Ptr<Int32>,
Ptr<Int64>,
Ptr<Decimal32>,
Ptr<Decimal64>,
Ptr<Decimal128>,
Ptr<Float32>,
Ptr<Float64>,
Ptr<StringRef>>
maps;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -172,12 +199,14 @@ private:
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const; const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
template <typename RangeType> template <typename RangeType>
void getIdsAndDates(PaddedPODArray<Key> & ids, void getIdsAndDates(PaddedPODArray<Key> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const;
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const;
template <typename T, typename RangeType> template <typename T, typename RangeType>
void getIdsAndDates(const Attribute & attribute, PaddedPODArray<Key> & ids, void getIdsAndDates(
PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const; const Attribute & attribute,
PaddedPODArray<Key> & ids,
PaddedPODArray<RangeType> & start_dates,
PaddedPODArray<RangeType> & end_dates) const;
template <typename RangeType> template <typename RangeType>
BlockInputStreamPtr getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const; BlockInputStreamPtr getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const;

View File

@ -1,24 +1,23 @@
#include <stack>
#include <ext/map.h>
#include <ext/range.h>
#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include "TrieDictionary.h" #include "TrieDictionary.h"
#include <Columns/ColumnVector.h> #include <iostream>
#include <stack>
#include <btrie.h>
#include <Columns/ColumnFixedString.h> #include <Columns/ColumnFixedString.h>
#include "DictionaryBlockInputStream.h" #include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeFixedString.h> #include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <IO/WriteIntText.h> #include <IO/WriteIntText.h>
#include <Poco/ByteOrder.h>
#include <Poco/Net/IPAddress.h>
#include <Common/formatIPv6.h> #include <Common/formatIPv6.h>
#include <iostream> #include <ext/map.h>
#include <btrie.h> #include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h" #include "DictionaryFactory.h"
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
@ -29,10 +28,17 @@ namespace ErrorCodes
} }
TrieDictionary::TrieDictionary( TrieDictionary::TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & name,
const DictionaryLifetime dict_lifetime, bool require_nonempty) const DictionaryStructure & dict_struct,
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), DictionarySourcePtr source_ptr,
require_nonempty(require_nonempty), logger(&Poco::Logger::get("TrieDictionary")) const DictionaryLifetime dict_lifetime,
bool require_nonempty)
: name{name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, require_nonempty(require_nonempty)
, logger(&Poco::Logger::get("TrieDictionary"))
{ {
createAttributes(); createAttributes();
trie = btrie_create(); trie = btrie_create();
@ -62,18 +68,20 @@ TrieDictionary::~TrieDictionary()
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void TrieDictionary::get##TYPE( \ void TrieDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
ResultArrayType<TYPE> & out) const\
{ \ { \
validateKeyTypes(key_types); \ validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
const auto null_value = std::get<TYPE>(attribute.null_values); \ const auto null_value = std::get<TYPE>(attribute.null_values); \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \ [&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t) { return null_value; }); \ [&](const size_t) { return null_value; }); \
} }
@ -94,34 +102,42 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void TrieDictionary::getString( void TrieDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
ColumnString * out) const
{ {
validateKeyTypes(key_types); validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto & null_value = StringRef{std::get<String>(attribute.null_values)}; const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return null_value; }); [&](const size_t) { return null_value; });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void TrieDictionary::get##TYPE( \ void TrieDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const PaddedPODArray<TYPE> & def, ResultArrayType<TYPE> & out) const\ const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const \
{ \ { \
validateKeyTypes(key_types); \ validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
attribute, \
key_columns, \
[&](const size_t row, const auto value) { out[row] = value; }, \ [&](const size_t row, const auto value) { out[row] = value; }, \
[&](const size_t row) { return def[row]; }); \ [&](const size_t row) { return def[row]; }); \
} }
@ -142,34 +158,43 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void TrieDictionary::getString( void TrieDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const ColumnString * const def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
{ {
validateKeyTypes(key_types); validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t row) { return def->getDataAt(row); }); [&](const size_t row) { return def->getDataAt(row); });
} }
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void TrieDictionary::get##TYPE( \ void TrieDictionary::get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const TYPE def, ResultArrayType<TYPE> & out) const\ const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const \
{ \ { \
validateKeyTypes(key_types); \ validateKeyTypes(key_types); \
\ \
const auto & attribute = getAttribute(attribute_name); \ const auto & attribute = getAttribute(attribute_name); \
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \
ErrorCodes::TYPE_MISMATCH}; \
\ \
getItemsNumber<TYPE>(attribute, key_columns,\ getItemsNumber<TYPE>( \
[&] (const size_t row, const auto value) { out[row] = value; },\ attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
[&] (const size_t) { return def; });\
} }
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -188,16 +213,22 @@ DECLARE(Decimal128)
#undef DECLARE #undef DECLARE
void TrieDictionary::getString( void TrieDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const String & def, ColumnString * const out) const const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{ {
validateKeyTypes(key_types); validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name); const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns, getItemsImpl<StringRef, StringRef>(
attribute,
key_columns,
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
[&](const size_t) { return StringRef{def}; }); [&](const size_t) { return StringRef{def}; });
} }
@ -210,22 +241,52 @@ void TrieDictionary::has(const Columns & key_columns, const DataTypes & key_type
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: has<UInt8>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: has<UInt16>(attribute, key_columns, out); break; has<UInt8>(attribute, key_columns, out);
case AttributeUnderlyingType::UInt32: has<UInt32>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::UInt64: has<UInt64>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: has<UInt128>(attribute, key_columns, out); break; has<UInt16>(attribute, key_columns, out);
case AttributeUnderlyingType::Int8: has<Int8>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Int16: has<Int16>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: has<Int32>(attribute, key_columns, out); break; has<UInt32>(attribute, key_columns, out);
case AttributeUnderlyingType::Int64: has<Int64>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Float32: has<Float32>(attribute, key_columns, out); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: has<Float64>(attribute, key_columns, out); break; has<UInt64>(attribute, key_columns, out);
case AttributeUnderlyingType::String: has<StringRef>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::UInt128:
has<UInt128>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int8:
has<Int8>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int16:
has<Int16>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int32:
has<Int32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Int64:
has<Int64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Float32:
has<Float32>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Float64:
has<Float64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::String:
has<StringRef>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Decimal32: has<Decimal32>(attribute, key_columns, out); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: has<Decimal64>(attribute, key_columns, out); break; has<Decimal32>(attribute, key_columns, out);
case AttributeUnderlyingType::Decimal128: has<Decimal128>(attribute, key_columns, out); break; break;
case AttributeUnderlyingType::Decimal64:
has<Decimal64>(attribute, key_columns, out);
break;
case AttributeUnderlyingType::Decimal128:
has<Decimal128>(attribute, key_columns, out);
break;
} }
} }
@ -240,7 +301,8 @@ void TrieDictionary::createAttributes()
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical) if (attribute.hierarchical)
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
} }
} }
@ -260,14 +322,10 @@ void TrieDictionary::loadData()
const auto rows = block.rows(); const auto rows = block.rows();
element_count += rows; element_count += rows;
const auto key_column_ptrs = ext::map<Columns>(ext::range(0, keys_size), const auto key_column_ptrs = ext::map<Columns>(
[&] (const size_t attribute_idx) ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
{
return block.safeGetByPosition(attribute_idx).column;
});
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
[&] (const size_t attribute_idx)
{ {
return block.safeGetByPosition(keys_size + attribute_idx).column; return block.safeGetByPosition(keys_size + attribute_idx).column;
}); });
@ -284,7 +342,6 @@ void TrieDictionary::loadData()
setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]); setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]);
} }
} }
} }
stream->readSuffix(); stream->readSuffix();
@ -309,21 +366,49 @@ void TrieDictionary::calculateBytesAllocated()
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break; addAttributeSize<UInt8>(attribute);
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break; break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: addAttributeSize<UInt128>(attribute); break; addAttributeSize<UInt16>(attribute);
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break; break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break; addAttributeSize<UInt32>(attribute);
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break; break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break; addAttributeSize<UInt64>(attribute);
break;
case AttributeUnderlyingType::UInt128:
addAttributeSize<UInt128>(attribute);
break;
case AttributeUnderlyingType::Int8:
addAttributeSize<Int8>(attribute);
break;
case AttributeUnderlyingType::Int16:
addAttributeSize<Int16>(attribute);
break;
case AttributeUnderlyingType::Int32:
addAttributeSize<Int32>(attribute);
break;
case AttributeUnderlyingType::Int64:
addAttributeSize<Int64>(attribute);
break;
case AttributeUnderlyingType::Float32:
addAttributeSize<Float32>(attribute);
break;
case AttributeUnderlyingType::Float64:
addAttributeSize<Float64>(attribute);
break;
case AttributeUnderlyingType::Decimal32: addAttributeSize<Decimal32>(attribute); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: addAttributeSize<Decimal64>(attribute); break; addAttributeSize<Decimal32>(attribute);
case AttributeUnderlyingType::Decimal128: addAttributeSize<Decimal128>(attribute); break; break;
case AttributeUnderlyingType::Decimal64:
addAttributeSize<Decimal64>(attribute);
break;
case AttributeUnderlyingType::Decimal128:
addAttributeSize<Decimal128>(attribute);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -363,21 +448,49 @@ TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const Attribut
switch (type) switch (type)
{ {
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break; case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break; createAttributeImpl<UInt8>(attr, null_value);
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break; break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break; case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt128: createAttributeImpl<UInt128>(attr, null_value); break; createAttributeImpl<UInt16>(attr, null_value);
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break; break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break; case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break; createAttributeImpl<UInt32>(attr, null_value);
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break; break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break; case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break; createAttributeImpl<UInt64>(attr, null_value);
break;
case AttributeUnderlyingType::UInt128:
createAttributeImpl<UInt128>(attr, null_value);
break;
case AttributeUnderlyingType::Int8:
createAttributeImpl<Int8>(attr, null_value);
break;
case AttributeUnderlyingType::Int16:
createAttributeImpl<Int16>(attr, null_value);
break;
case AttributeUnderlyingType::Int32:
createAttributeImpl<Int32>(attr, null_value);
break;
case AttributeUnderlyingType::Int64:
createAttributeImpl<Int64>(attr, null_value);
break;
case AttributeUnderlyingType::Float32:
createAttributeImpl<Float32>(attr, null_value);
break;
case AttributeUnderlyingType::Float64:
createAttributeImpl<Float64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal32: createAttributeImpl<Decimal32>(attr, null_value); break; case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: createAttributeImpl<Decimal64>(attr, null_value); break; createAttributeImpl<Decimal32>(attr, null_value);
case AttributeUnderlyingType::Decimal128: createAttributeImpl<Decimal128>(attr, null_value); break; break;
case AttributeUnderlyingType::Decimal64:
createAttributeImpl<Decimal64>(attr, null_value);
break;
case AttributeUnderlyingType::Decimal128:
createAttributeImpl<Decimal128>(attr, null_value);
break;
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -394,15 +507,14 @@ TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const Attribut
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void TrieDictionary::getItemsNumber( void TrieDictionary::getItemsNumber(
const Attribute & attribute, const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
if (false) {} if (false)
{
}
#define DISPATCH(TYPE) \ #define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) getItemsImpl<TYPE, OutputType>( \
getItemsImpl<TYPE, OutputType>(attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default)); attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
DISPATCH(UInt8) DISPATCH(UInt8)
DISPATCH(UInt16) DISPATCH(UInt16)
DISPATCH(UInt32) DISPATCH(UInt32)
@ -418,16 +530,12 @@ void TrieDictionary::getItemsNumber(
DISPATCH(Decimal64) DISPATCH(Decimal64)
DISPATCH(Decimal128) DISPATCH(Decimal128)
#undef DISPATCH #undef DISPATCH
else else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
} }
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void TrieDictionary::getItemsImpl( void TrieDictionary::getItemsImpl(
const Attribute & attribute, const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{ {
auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps); auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
@ -473,7 +581,6 @@ bool TrieDictionary::setAttributeValueImpl(Attribute & attribute, const StringRe
size_t pos = addr_str.find('/'); size_t pos = addr_str.find('/');
if (pos != std::string::npos) if (pos != std::string::npos)
{ {
addr = Poco::Net::IPAddress(addr_str.substr(0, pos)); addr = Poco::Net::IPAddress(addr_str.substr(0, pos));
mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family()); mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family());
} }
@ -506,21 +613,35 @@ bool TrieDictionary::setAttributeValue(Attribute & attribute, const StringRef ke
{ {
switch (attribute.type) switch (attribute.type)
{ {
case AttributeUnderlyingType::UInt8: return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>()); case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16: return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>()); return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>()); case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt64: return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>()); return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt128: return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>()); case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::Int8: return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>()); return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::Int16: return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>()); case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int32: return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>()); return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::Int64: return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>()); case AttributeUnderlyingType::UInt128:
case AttributeUnderlyingType::Float32: return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>()); return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
case AttributeUnderlyingType::Float64: return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>()); case AttributeUnderlyingType::Int8:
return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int16:
return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int32:
return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int64:
return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Float32:
return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::Float64:
return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::Decimal32: return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>()); case AttributeUnderlyingType::Decimal32:
case AttributeUnderlyingType::Decimal64: return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>()); return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
case AttributeUnderlyingType::Decimal128: return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>()); case AttributeUnderlyingType::Decimal64:
return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
case AttributeUnderlyingType::Decimal128:
return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
case AttributeUnderlyingType::String: case AttributeUnderlyingType::String:
{ {
@ -642,8 +763,8 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam
auto getKeys = [](const Columns & columns, const std::vector<DictionaryAttribute> & attributes) auto getKeys = [](const Columns & columns, const std::vector<DictionaryAttribute> & attributes)
{ {
const auto & attr = attributes.front(); const auto & attr = attributes.front();
return ColumnsWithTypeAndName({ColumnWithTypeAndName(columns.front(), return ColumnsWithTypeAndName(
std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)}); {ColumnWithTypeAndName(columns.front(), std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
}; };
auto getView = [](const Columns & columns, const std::vector<DictionaryAttribute> & attributes) auto getView = [](const Columns & columns, const std::vector<DictionaryAttribute> & attributes)
{ {
@ -660,22 +781,22 @@ BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_nam
auto size = detail::writeUIntText(mask, ptr); auto size = detail::writeUIntText(mask, ptr);
column->insertData(buffer, size + (ptr - buffer)); column->insertData(buffer, size + (ptr - buffer));
} }
return ColumnsWithTypeAndName{ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), attributes.front().name)}; return ColumnsWithTypeAndName{
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), attributes.front().name)};
}; };
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeyColumns(), column_names, return std::make_shared<BlockInputStreamType>(
std::move(getKeys), std::move(getView)); shared_from_this(), max_block_size, getKeyColumns(), column_names, std::move(getKeys), std::move(getView));
} }
void registerDictionaryTrie(DictionaryFactory & factory) void registerDictionaryTrie(DictionaryFactory & factory)
{ {
auto create_layout = [=]( auto create_layout = [=](const std::string & name,
const std::string & name,
const DictionaryStructure & dict_struct, const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix, const std::string & config_prefix,
DictionarySourcePtr source_ptr DictionarySourcePtr source_ptr) -> DictionaryPtr
) -> DictionaryPtr { {
if (!dict_struct.key) if (!dict_struct.key)
throw Exception{"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS}; throw Exception{"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS};

View File

@ -1,31 +1,33 @@
#pragma once #pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <common/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.h>
#include <atomic> #include <atomic>
#include <memory> #include <memory>
#include <variant> #include <variant>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Common/HashTable/HashMap.h>
#include <common/StringRef.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <ext/range.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
struct btrie_s; struct btrie_s;
typedef struct btrie_s btrie_t; typedef struct btrie_s btrie_t;
namespace DB namespace DB
{ {
class TrieDictionary final : public IDictionaryBase class TrieDictionary final : public IDictionaryBase
{ {
public: public:
TrieDictionary( TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const std::string & name,
const DictionaryLifetime dict_lifetime, bool require_nonempty); const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty);
TrieDictionary(const TrieDictionary & other); TrieDictionary(const TrieDictionary & other);
@ -59,10 +61,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; } const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override bool isInjective(const std::string & attribute_name) const override
{ {
@ -74,7 +73,31 @@ public:
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const; ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
@ -93,13 +116,19 @@ public:
#undef DECLARE #undef DECLARE
void getString( void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
ColumnString * out) const; const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \ #define DECLARE(TYPE) \
void get##TYPE( \ void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ const std::string & attribute_name, \
const PaddedPODArray<TYPE> & def, ResultArrayType<TYPE> & out) const; const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8) DECLARE(UInt8)
DECLARE(UInt16) DECLARE(UInt16)
DECLARE(UInt32) DECLARE(UInt32)
@ -117,57 +146,57 @@ public:
#undef DECLARE #undef DECLARE
void getString( void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const std::string & attribute_name,
const ColumnString * const def, ColumnString * const out) const; const Columns & key_columns,
const DataTypes & key_types,
#define DECLARE(TYPE)\ const String & def,
void get##TYPE(\ ColumnString * const out) const;
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\
const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(UInt128)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
DECLARE(Decimal32)
DECLARE(Decimal64)
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private: private:
template <typename Value> using ContainerType = std::vector<Value>; template <typename Value>
using ContainerType = std::vector<Value>;
struct Attribute final struct Attribute final
{ {
AttributeUnderlyingType type; AttributeUnderlyingType type;
std::variant< std::variant<
UInt8, UInt16, UInt32, UInt64, UInt8,
UInt16,
UInt32,
UInt64,
UInt128, UInt128,
Int8, Int16, Int32, Int64, Int8,
Decimal32, Decimal64, Decimal128, Int16,
Float32, Float64, Int32,
String> null_values; Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant< std::variant<
ContainerType<UInt8>, ContainerType<UInt16>, ContainerType<UInt32>, ContainerType<UInt64>, ContainerType<UInt8>,
ContainerType<UInt16>,
ContainerType<UInt32>,
ContainerType<UInt64>,
ContainerType<UInt128>, ContainerType<UInt128>,
ContainerType<Int8>, ContainerType<Int16>, ContainerType<Int32>, ContainerType<Int64>, ContainerType<Int8>,
ContainerType<Decimal32>, ContainerType<Decimal64>, ContainerType<Decimal128>, ContainerType<Int16>,
ContainerType<Float32>, ContainerType<Float64>, ContainerType<Int32>,
ContainerType<StringRef>> maps; ContainerType<Int64>,
ContainerType<Decimal32>,
ContainerType<Decimal64>,
ContainerType<Decimal128>,
ContainerType<Float32>,
ContainerType<Float64>,
ContainerType<StringRef>>
maps;
std::unique_ptr<Arena> string_arena; std::unique_ptr<Arena> string_arena;
}; };
@ -189,18 +218,12 @@ private:
template <typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber( void
const Attribute & attribute, getItemsNumber(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl( void
const Attribute & attribute, getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename T> template <typename T>

View File

@ -1,21 +1,21 @@
#include "XDBCDictionarySource.h" #include "XDBCDictionarySource.h"
#include <common/logger_useful.h>
#include <common/LocalDateTime.h>
#include <Poco/Ext/SessionPoolHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Net/HTTPRequest.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <DataStreams/IProfilingBlockInputStream.h> #include <DataStreams/IProfilingBlockInputStream.h>
#include "readInvalidateQuery.h" #include <DataTypes/DataTypeString.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Poco/Ext/SessionPoolHelpers.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/XDBCBridgeHelper.h> #include <Common/XDBCBridgeHelper.h>
#include <common/LocalDateTime.h>
#include <common/logger_useful.h>
#include "DictionarySourceFactory.h" #include "DictionarySourceFactory.h"
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "readInvalidateQuery.h"
#include <Common/config.h> #include <Common/config.h>
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC || USE_POCO_DATAODBC
@ -24,7 +24,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
@ -35,32 +34,27 @@ namespace
class XDBCBridgeBlockInputStream : public IProfilingBlockInputStream class XDBCBridgeBlockInputStream : public IProfilingBlockInputStream
{ {
public: public:
XDBCBridgeBlockInputStream(const Poco::URI & uri, XDBCBridgeBlockInputStream(
const Poco::URI & uri,
std::function<void(std::ostream &)> callback, std::function<void(std::ostream &)> callback,
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
size_t max_block_size, size_t max_block_size,
const ConnectionTimeouts & timeouts, const String name) : name(name) const ConnectionTimeouts & timeouts,
const String name)
: name(name)
{ {
read_buf = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, callback, timeouts); read_buf = std::make_unique<ReadWriteBufferFromHTTP>(uri, Poco::Net::HTTPRequest::HTTP_POST, callback, timeouts);
reader = FormatFactory::instance().getInput(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, sample_block, context, max_block_size); reader
= FormatFactory::instance().getInput(IXDBCBridgeHelper::DEFAULT_FORMAT, *read_buf, sample_block, context, max_block_size);
} }
Block getHeader() const override Block getHeader() const override { return reader->getHeader(); }
{
return reader->getHeader();
}
String getName() const override String getName() const override { return name; }
{
return name;
}
private: private:
Block readImpl() override Block readImpl() override { return reader->read(); }
{
return reader->read();
}
String name; String name;
std::unique_ptr<ReadWriteBufferFromHTTP> read_buf; std::unique_ptr<ReadWriteBufferFromHTTP> read_buf;
@ -71,23 +65,27 @@ namespace
static const size_t max_block_size = 8192; static const size_t max_block_size = 8192;
XDBCDictionarySource::XDBCDictionarySource(const DictionaryStructure & dict_struct_, XDBCDictionarySource::XDBCDictionarySource(
const Poco::Util::AbstractConfiguration & config_, const std::string & config_prefix_, const DictionaryStructure & dict_struct_,
const Block & sample_block_, const Context & context_, const BridgeHelperPtr bridge_) const Poco::Util::AbstractConfiguration & config_,
: log(&Logger::get(bridge_->getName() + "DictionarySource")), const std::string & config_prefix_,
update_time{std::chrono::system_clock::from_time_t(0)}, const Block & sample_block_,
dict_struct{dict_struct_}, const Context & context_,
db{config_.getString(config_prefix_ + ".db", "")}, const BridgeHelperPtr bridge_)
table{config_.getString(config_prefix_ + ".table")}, : log(&Logger::get(bridge_->getName() + "DictionarySource"))
where{config_.getString(config_prefix_ + ".where", "")}, , update_time{std::chrono::system_clock::from_time_t(0)}
update_field{config_.getString(config_prefix_ + ".update_field", "")}, , dict_struct{dict_struct_}
sample_block{sample_block_}, , db{config_.getString(config_prefix_ + ".db", "")}
query_builder{dict_struct, db, table, where, bridge_->getIdentifierQuotingStyle()}, , table{config_.getString(config_prefix_ + ".table")}
load_all_query{query_builder.composeLoadAllQuery()}, , where{config_.getString(config_prefix_ + ".where", "")}
invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")}, , update_field{config_.getString(config_prefix_ + ".update_field", "")}
bridge_helper{bridge_}, , sample_block{sample_block_}
timeouts{ConnectionTimeouts::getHTTPTimeouts(context_.getSettingsRef())}, , query_builder{dict_struct, db, table, where, bridge_->getIdentifierQuotingStyle()}
global_context(context_) , load_all_query{query_builder.composeLoadAllQuery()}
, invalidate_query{config_.getString(config_prefix_ + ".invalidate_query", "")}
, bridge_helper{bridge_}
, timeouts{ConnectionTimeouts::getHTTPTimeouts(context_.getSettingsRef())}
, global_context(context_)
{ {
bridge_url = bridge_helper->getMainURI(); bridge_url = bridge_helper->getMainURI();
@ -98,24 +96,23 @@ XDBCDictionarySource::XDBCDictionarySource(const DictionaryStructure & dict_stru
/// copy-constructor is provided in order to support cloneability /// copy-constructor is provided in order to support cloneability
XDBCDictionarySource::XDBCDictionarySource(const XDBCDictionarySource & other) XDBCDictionarySource::XDBCDictionarySource(const XDBCDictionarySource & other)
: log(&Logger::get(other.bridge_helper->getName() + "DictionarySource")), : log(&Logger::get(other.bridge_helper->getName() + "DictionarySource"))
update_time{other.update_time}, , update_time{other.update_time}
dict_struct{other.dict_struct}, , dict_struct{other.dict_struct}
db{other.db}, , db{other.db}
table{other.table}, , table{other.table}
where{other.where}, , where{other.where}
update_field{other.update_field}, , update_field{other.update_field}
sample_block{other.sample_block}, , sample_block{other.sample_block}
query_builder{dict_struct, db, table, where, other.bridge_helper->getIdentifierQuotingStyle()}, , query_builder{dict_struct, db, table, where, other.bridge_helper->getIdentifierQuotingStyle()}
load_all_query{other.load_all_query}, , load_all_query{other.load_all_query}
invalidate_query{other.invalidate_query}, , invalidate_query{other.invalidate_query}
invalidate_query_response{other.invalidate_query_response}, , invalidate_query_response{other.invalidate_query_response}
bridge_helper{other.bridge_helper}, , bridge_helper{other.bridge_helper}
bridge_url{other.bridge_url}, , bridge_url{other.bridge_url}
timeouts{other.timeouts}, , timeouts{other.timeouts}
global_context{other.global_context} , global_context{other.global_context}
{ {
} }
std::string XDBCDictionarySource::getUpdateFieldAndDate() std::string XDBCDictionarySource::getUpdateFieldAndDate()
@ -156,8 +153,7 @@ BlockInputStreamPtr XDBCDictionarySource::loadIds(const std::vector<UInt64> & id
return loadBase(query); return loadBase(query);
} }
BlockInputStreamPtr XDBCDictionarySource::loadKeys( BlockInputStreamPtr XDBCDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
const Columns & key_columns, const std::vector<size_t> & requested_rows)
{ {
const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN); const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN);
return loadBase(query); return loadBase(query);
@ -215,7 +211,8 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request)
invalidate_sample_block, invalidate_sample_block,
global_context, global_context,
max_block_size, max_block_size,
timeouts, bridge_helper->getName() + "BlockInputStream"); timeouts,
bridge_helper->getName() + "BlockInputStream");
return readInvalidateQuery(stream); return readInvalidateQuery(stream);
} }
@ -223,12 +220,14 @@ std::string XDBCDictionarySource::doInvalidateQuery(const std::string & request)
BlockInputStreamPtr XDBCDictionarySource::loadBase(const std::string & query) const BlockInputStreamPtr XDBCDictionarySource::loadBase(const std::string & query) const
{ {
bridge_helper->startBridgeSync(); bridge_helper->startBridgeSync();
return std::make_shared<XDBCBridgeBlockInputStream>(bridge_url, return std::make_shared<XDBCBridgeBlockInputStream>(
bridge_url,
[query](std::ostream & os) { os << "query=" << query; }, [query](std::ostream & os) { os << "query=" << query; },
sample_block, sample_block,
global_context, global_context,
max_block_size, max_block_size,
timeouts, bridge_helper->getName() + "BlockInputStream"); timeouts,
bridge_helper->getName() + "BlockInputStream");
} }
void registerDictionarySourceXDBC(DictionarySourceFactory & factory) void registerDictionarySourceXDBC(DictionarySourceFactory & factory)
@ -243,7 +242,8 @@ void registerDictionarySourceXDBC(DictionarySourceFactory & factory)
Block & sample_block, Block & sample_block,
Context & context) -> DictionarySourcePtr { Context & context) -> DictionarySourcePtr {
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC || USE_POCO_DATAODBC
BridgeHelperPtr bridge = std::make_shared<XDBCBridgeHelper<ODBCBridgeMixin>>(context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string")); BridgeHelperPtr bridge = std::make_shared<XDBCBridgeHelper<ODBCBridgeMixin>>(
context, context.getSettings().http_receive_timeout, config.getString(config_prefix + ".odbc.connection_string"));
return std::make_unique<XDBCDictionarySource>(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge); return std::make_unique<XDBCDictionarySource>(dict_struct, config, config_prefix + ".odbc", sample_block, context, bridge);
#else #else
(void)dict_struct; (void)dict_struct;

View File

@ -1,12 +1,12 @@
#pragma once #pragma once
#include <IO/ConnectionTimeouts.h>
#include <Poco/Data/SessionPool.h> #include <Poco/Data/SessionPool.h>
#include <Poco/URI.h> #include <Poco/URI.h>
#include <Common/XDBCBridgeHelper.h>
#include "DictionaryStructure.h" #include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h" #include "ExternalQueryBuilder.h"
#include "IDictionarySource.h" #include "IDictionarySource.h"
#include <IO/ConnectionTimeouts.h>
#include <Common/XDBCBridgeHelper.h>
namespace Poco namespace Poco
@ -26,7 +26,8 @@ namespace DB
class XDBCDictionarySource final : public IDictionarySource class XDBCDictionarySource final : public IDictionarySource
{ {
public: public:
XDBCDictionarySource(const DictionaryStructure & dict_struct_, XDBCDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config_, const Poco::Util::AbstractConfiguration & config_,
const std::string & config_prefix_, const std::string & config_prefix_,
const Block & sample_block_, const Block & sample_block_,

View File

@ -3,7 +3,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int TOO_MANY_COLUMNS; extern const int TOO_MANY_COLUMNS;

View File

@ -5,7 +5,6 @@ class IProfilingBlockInputStream;
namespace DB namespace DB
{ {
// Using in MySQLDictionarySource and XDBCDictionarySource after processing invalidate_query // Using in MySQLDictionarySource and XDBCDictionarySource after processing invalidate_query
std::string readInvalidateQuery(IProfilingBlockInputStream & block_input_stream); std::string readInvalidateQuery(IProfilingBlockInputStream & block_input_stream);

Some files were not shown because too many files have changed in this diff Show More