ClickHouse/src/Dictionaries/MySQLDictionarySource.cpp

303 lines
10 KiB
C++
Raw Normal View History

#include "MySQLDictionarySource.h"
#if USE_MYSQL
# include <mysqlxx/PoolFactory.h>
#endif
#include <Poco/Util/AbstractConfiguration.h>
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
2019-12-15 06:34:43 +00:00
#include "registerDictionaries.h"
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Processors/Pipe.h>
namespace DB
{
[[maybe_unused]]
static const size_t default_num_tries_on_connection_loss = 3;
namespace ErrorCodes
{
extern const int SUPPORT_IS_DISABLED;
extern const int UNSUPPORTED_METHOD;
}
void registerDictionarySourceMysql(DictionarySourceFactory & factory)
{
auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
[[maybe_unused]] const std::string & config_prefix,
[[maybe_unused]] Block & sample_block,
2021-08-12 15:16:55 +00:00
[[maybe_unused]] ContextPtr global_context,
2020-08-15 03:10:57 +00:00
const std::string & /* default_database */,
bool /* created_from_ddl */) -> DictionarySourcePtr {
#if USE_MYSQL
2021-08-12 15:16:55 +00:00
StreamSettings mysql_input_stream_settings(global_context->getSettingsRef()
, config.getBool(config_prefix + ".mysql.close_connection", false) || config.getBool(config_prefix + ".mysql.share_connection", false)
, false
, config.getBool(config_prefix + ".mysql.fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss);
auto settings_config_prefix = config_prefix + ".mysql";
auto table = config.getString(settings_config_prefix + ".table", "");
auto where = config.getString(settings_config_prefix + ".where", "");
auto query = config.getString(settings_config_prefix + ".query", "");
if (query.empty() && table.empty())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL dictionary source configuration must contain table or query field");
MySQLDictionarySource::Configuration configuration
{
.db = config.getString(settings_config_prefix + ".db", ""),
.table = table,
.query = query,
.where = where,
.invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""),
.update_field = config.getString(settings_config_prefix + ".update_field", ""),
.update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1),
.dont_check_update_time = config.getBool(settings_config_prefix + ".dont_check_update_time", false)
};
auto pool = std::make_shared<mysqlxx::PoolWithFailover>(mysqlxx::PoolFactory::instance().get(config, settings_config_prefix));
return std::make_unique<MySQLDictionarySource>(dict_struct, configuration, std::move(pool), sample_block, mysql_input_stream_settings);
#else
2021-04-10 18:48:36 +00:00
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.");
#endif
};
factory.registerSource("mysql", create_table_source);
}
}
#if USE_MYSQL
# include <Columns/ColumnString.h>
# include <DataTypes/DataTypeString.h>
# include <IO/WriteBufferFromString.h>
# include <IO/WriteHelpers.h>
# include <common/LocalDateTime.h>
# include <common/logger_useful.h>
# include "readInvalidateQuery.h"
# include <mysqlxx/Exception.h>
# include <Core/Settings.h>
2018-06-05 19:46:49 +00:00
namespace DB
{
MySQLDictionarySource::MySQLDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
mysqlxx::PoolWithFailoverPtr pool_,
const Block & sample_block_,
const StreamSettings & settings_)
2020-05-30 21:57:37 +00:00
: log(&Poco::Logger::get("MySQLDictionarySource"))
, update_time(std::chrono::system_clock::from_time_t(0))
, dict_struct(dict_struct_)
, configuration(configuration_)
, pool(std::move(pool_))
, sample_block(sample_block_)
, query_builder(dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks)
2021-08-09 11:23:44 +00:00
, load_all_query(query_builder.composeLoadAllQuery())
, settings(settings_)
{
}
/// copy-constructor is provided in order to support cloneability
MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other)
2020-05-30 21:57:37 +00:00
: log(&Poco::Logger::get("MySQLDictionarySource"))
, update_time(other.update_time)
, dict_struct(other.dict_struct)
, configuration(other.configuration)
, pool(other.pool)
, sample_block(other.sample_block)
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, load_all_query{other.load_all_query}
, last_modification{other.last_modification}
, invalidate_query_response{other.invalidate_query_response}
, settings(other.settings)
{
}
std::string MySQLDictionarySource::getUpdateFieldAndDate()
{
if (update_time != std::chrono::system_clock::from_time_t(0))
{
time_t hr_time = std::chrono::system_clock::to_time_t(update_time) - configuration.update_lag;
2021-03-15 19:23:27 +00:00
std::string str_time = DateLUT::instance().timeToString(hr_time);
update_time = std::chrono::system_clock::now();
return query_builder.composeUpdateQuery(configuration.update_field, str_time);
}
else
{
update_time = std::chrono::system_clock::now();
return load_all_query;
}
}
Pipe MySQLDictionarySource::loadFromQuery(const String & query)
{
return Pipe(std::make_shared<MySQLWithFailoverSource>(
pool, query, sample_block, settings));
}
Pipe MySQLDictionarySource::loadAll()
{
2021-03-27 14:35:44 +00:00
auto connection = pool->get();
last_modification = getLastModification(connection, false);
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, load_all_query);
2021-04-02 16:12:14 +00:00
return loadFromQuery(load_all_query);
}
Pipe MySQLDictionarySource::loadUpdatedAll()
{
2021-03-27 14:35:44 +00:00
auto connection = pool->get();
last_modification = getLastModification(connection, false);
std::string load_update_query = getUpdateFieldAndDate();
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, load_update_query);
2021-04-02 16:12:14 +00:00
return loadFromQuery(load_update_query);
}
Pipe MySQLDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
/// We do not log in here and do not update the modification time, as the request can be large, and often called.
const auto query = query_builder.composeLoadIdsQuery(ids);
2021-04-02 16:12:14 +00:00
return loadFromQuery(query);
}
Pipe MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
{
/// We do not log in here and do not update the modification time, as the request can be large, and often called.
const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN);
2021-04-02 16:12:14 +00:00
return loadFromQuery(query);
}
bool MySQLDictionarySource::isModified() const
{
if (!configuration.invalidate_query.empty())
{
auto response = doInvalidateQuery(configuration.invalidate_query);
if (response == invalidate_query_response) //-V1051
return false;
invalidate_query_response = response;
return true;
}
if (configuration.dont_check_update_time)
return true;
2021-03-27 14:35:44 +00:00
auto connection = pool->get();
return getLastModification(connection, true) > last_modification;
}
bool MySQLDictionarySource::supportsSelectiveLoad() const
{
return true;
}
bool MySQLDictionarySource::hasUpdateField() const
{
return !configuration.update_field.empty();
}
DictionarySourcePtr MySQLDictionarySource::clone() const
{
return std::make_unique<MySQLDictionarySource>(*this);
}
std::string MySQLDictionarySource::toString() const
{
const auto & where = configuration.where;
return "MySQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where);
}
std::string MySQLDictionarySource::quoteForLike(const std::string & value)
{
std::string tmp;
tmp.reserve(value.size());
for (auto c : value)
{
if (c == '%' || c == '_' || c == '\\')
tmp.push_back('\\');
tmp.push_back(c);
}
2017-07-31 21:39:24 +00:00
WriteBufferFromOwnString out;
writeQuoted(tmp, out);
return out.str();
}
LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry & connection, bool allow_connection_closure) const
{
LocalDateTime modification_time{std::time(nullptr)};
if (configuration.dont_check_update_time)
return modification_time;
try
{
auto query = connection->query("SHOW TABLE STATUS LIKE " + quoteForLike(configuration.table));
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, query.str());
auto result = query.use();
size_t fetched_rows = 0;
if (auto row = result.fetch())
{
++fetched_rows;
static const auto UPDATE_TIME_IDX = 12;
const auto & update_time_value = row[UPDATE_TIME_IDX];
if (!update_time_value.isNull())
{
modification_time = update_time_value.getDateTime();
2021-03-15 19:23:27 +00:00
LOG_TRACE(log, "Got modification time: {}", update_time_value.getString());
}
/// fetch remaining rows to avoid "commands out of sync" error
while (result.fetch())
++fetched_rows;
}
if (settings.auto_close && allow_connection_closure)
{
connection.disconnect();
}
if (0 == fetched_rows)
2020-05-23 22:24:01 +00:00
LOG_ERROR(log, "Cannot find table in SHOW TABLE STATUS result.");
if (fetched_rows > 1)
2020-05-23 22:24:01 +00:00
LOG_ERROR(log, "Found more than one table in SHOW TABLE STATUS result.");
}
catch (...)
{
tryLogCurrentException("MySQLDictionarySource");
}
/// we suppose failure to get modification time is not an error, therefore return current time
return modification_time;
}
std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request) const
{
Block invalidate_sample_block;
ColumnPtr column(ColumnString::create());
invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
return readInvalidateQuery(Pipe(std::make_unique<MySQLSource>(pool->get(), request, invalidate_sample_block, settings)));
}
}
#endif