refine table source for regexp tree dictionary

This commit is contained in:
Han Fei 2023-05-09 20:17:54 +02:00
parent 71c96ceb61
commit ddce47f79e
5 changed files with 47 additions and 13 deletions

View File

@ -71,11 +71,11 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
: update_time{std::chrono::system_clock::from_time_t(0)}
, dict_struct{dict_struct_}
, configuration{configuration_}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
, sample_block{sample_block_}
, context(context_)
, pool{createPool(configuration)}
, load_all_query{query_builder.composeLoadAllQuery()}
, load_all_query{query_builder->composeLoadAllQuery()}
{
}
@ -84,7 +84,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar
, dict_struct{other.dict_struct}
, configuration{other.configuration}
, invalidate_query_response{other.invalidate_query_response}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, query_builder(std::make_shared<ExternalQueryBuilder>(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks))
, sample_block{other.sample_block}
, context(Context::createCopy(other.context))
, pool{createPool(configuration)}
@ -99,12 +99,12 @@ std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag;
std::string str_time = DateLUT::instance().timeToString(hr_time);
update_time = std::chrono::system_clock::now();
return query_builder.composeUpdateQuery(configuration.update_field, str_time);
return query_builder->composeUpdateQuery(configuration.update_field, str_time);
}
else
{
update_time = std::chrono::system_clock::now();
return query_builder.composeLoadAllQuery();
return query_builder->composeLoadAllQuery();
}
}
@ -121,13 +121,13 @@ QueryPipeline ClickHouseDictionarySource::loadUpdatedAll()
QueryPipeline ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
return createStreamForQuery(query_builder.composeLoadIdsQuery(ids));
return createStreamForQuery(query_builder->composeLoadIdsQuery(ids));
}
QueryPipeline ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
{
String query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
String query = query_builder->composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES);
return createStreamForQuery(query);
}

View File

@ -78,11 +78,11 @@ private:
const DictionaryStructure dict_struct;
const Configuration configuration;
mutable std::string invalidate_query_response;
ExternalQueryBuilder query_builder;
ExternalQueryBuilderPtr query_builder;
Block sample_block;
ContextMutablePtr context;
ConnectionPoolWithFailoverPtr pool;
const std::string load_all_query;
std::string load_all_query;
Poco::Logger * log = &Poco::Logger::get("ClickHouseDictionarySource");
/// RegExpTreeDictionary is the only dictionary whose structure of attributions differ from the input block.

View File

@ -36,6 +36,10 @@ struct ExternalQueryBuilder
const std::string & where_,
IdentifierQuotingStyle quoting_style_);
ExternalQueryBuilder(const ExternalQueryBuilder &) = default;
virtual ~ExternalQueryBuilder() = default;
/** Generate a query to load all data. */
std::string composeLoadAllQuery() const;
@ -61,10 +65,10 @@ struct ExternalQueryBuilder
std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector<size_t> & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0) const;
private:
protected:
const FormatSettings format_settings = {};
void composeLoadAllQuery(WriteBuffer & out) const;
virtual void composeLoadAllQuery(WriteBuffer & out) const;
/// In the following methods `beg` and `end` specifies which columns to write in expression
@ -93,4 +97,6 @@ private:
void writeQuoted(const std::string & s, WriteBuffer & out) const;
};
using ExternalQueryBuilderPtr = std::shared_ptr<ExternalQueryBuilder>;
}

View File

@ -20,6 +20,7 @@
#include <Functions/Regexps.h>
#include <Functions/checkHyperscanRegexp.h>
#include <QueryPipeline/QueryPipeline.h>
#include <Processors/Sources/BlocksListSource.h>
#include <Dictionaries/ClickHouseDictionarySource.h>
#include <Dictionaries/DictionaryFactory.h>
@ -31,7 +32,6 @@
#include <re2_st/stringpiece.h>
#include "Processors/Sources/BlocksListSource.h"
#include "config.h"
#if USE_VECTORSCAN
@ -87,6 +87,32 @@ namespace
}
}
struct ExternalRegexpQueryBuilder final : public ExternalQueryBuilder
{
explicit ExternalRegexpQueryBuilder(const ExternalQueryBuilder & builder) : ExternalQueryBuilder(builder) {}
void composeLoadAllQuery(WriteBuffer & out) const override
{
writeString("SELECT id, parent_id, regexp, keys, values FROM ", out);
if (!db.empty())
{
writeQuoted(db, out);
writeChar('.', out);
}
if (!schema.empty())
{
writeQuoted(schema, out);
writeChar('.', out);
}
writeQuoted(table, out);
if (!where.empty())
{
writeString(" WHERE ", out);
writeString(where, out);
}
}
};
struct RegExpTreeDictionary::RegexTreeNode
{
std::vector<UInt64> children;
@ -385,6 +411,8 @@ RegExpTreeDictionary::RegExpTreeDictionary(
sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kKeys));
sample_block.insert(ColumnWithTypeAndName(std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), kValues));
ch_source->sample_block = std::move(sample_block);
ch_source->query_builder = std::make_shared<ExternalRegexpQueryBuilder>(*ch_source->query_builder);
ch_source->load_all_query = ch_source->query_builder->composeLoadAllQuery();
}
loadData();

View File

@ -29,7 +29,7 @@ create dictionary regexp_dict1
comment String default 'nothing'
)
PRIMARY KEY(regexp)
SOURCE(CLICKHOUSE(QUERY concat('select * from ', currentDatabase() , '.regexp_dictionary_source_table')))
SOURCE(CLICKHOUSE(TABLE 'regexp_dictionary_source_table'))
LIFETIME(0)
LAYOUT(regexp_tree);