2019-02-16 10:11:49 +00:00
|
|
|
#include "CassandraDictionarySource.h"
|
|
|
|
#include "DictionarySourceFactory.h"
|
|
|
|
#include "DictionaryStructure.h"
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-06-03 15:07:37 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int SUPPORT_IS_DISABLED;
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
void registerDictionarySourceCassandra(DictionarySourceFactory & factory)
|
|
|
|
{
|
|
|
|
auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
|
|
|
|
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
|
|
|
|
[[maybe_unused]] const std::string & config_prefix,
|
|
|
|
[[maybe_unused]] Block & sample_block,
|
|
|
|
const Context & /* context */,
|
|
|
|
bool /*check_config*/) -> DictionarySourcePtr
|
2019-02-16 10:11:49 +00:00
|
|
|
{
|
|
|
|
#if USE_CASSANDRA
|
2020-06-03 15:07:37 +00:00
|
|
|
setupCassandraDriverLibraryLogging(CASS_LOG_INFO);
|
|
|
|
return std::make_unique<CassandraDictionarySource>(dict_struct, config, config_prefix + ".cassandra", sample_block);
|
2019-02-16 10:11:49 +00:00
|
|
|
#else
|
2020-06-03 15:07:37 +00:00
|
|
|
throw Exception{"Dictionary source of type `cassandra` is disabled because ClickHouse was built without cassandra support.",
|
|
|
|
ErrorCodes::SUPPORT_IS_DISABLED};
|
2019-02-16 10:11:49 +00:00
|
|
|
#endif
|
2020-06-03 15:07:37 +00:00
|
|
|
};
|
|
|
|
factory.registerSource("cassandra", create_table_source);
|
|
|
|
}
|
2019-02-16 10:11:49 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#if USE_CASSANDRA
|
|
|
|
|
2020-05-27 20:13:25 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2020-06-03 11:54:26 +00:00
|
|
|
#include <Common/SipHash.h>
|
2020-05-27 20:13:25 +00:00
|
|
|
#include "CassandraBlockInputStream.h"
|
2020-06-03 15:07:37 +00:00
|
|
|
#include <common/logger_useful.h>
|
|
|
|
#include <DataStreams/UnionBlockInputStream.h>
|
2019-02-16 10:11:49 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2019-05-29 23:01:25 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int INVALID_CONFIG_PARAMETER;
|
|
|
|
}
|
|
|
|
|
|
|
|
CassandraSettings::CassandraSettings(
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const String & config_prefix)
|
|
|
|
: host(config.getString(config_prefix + ".host"))
|
|
|
|
, port(config.getUInt(config_prefix + ".port", 0))
|
|
|
|
, user(config.getString(config_prefix + ".user", ""))
|
|
|
|
, password(config.getString(config_prefix + ".password", ""))
|
2020-06-03 15:07:37 +00:00
|
|
|
, db(config.getString(config_prefix + ".keyspace"))
|
2020-06-03 11:54:26 +00:00
|
|
|
, table(config.getString(config_prefix + ".column_family"))
|
|
|
|
, allow_filtering(config.getBool(config_prefix + ".allow_filtering", false))
|
|
|
|
, partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1))
|
|
|
|
, max_threads(config.getUInt(config_prefix + ".max_threads", 8))
|
|
|
|
, where(config.getString(config_prefix + ".where", ""))
|
|
|
|
{
|
|
|
|
setConsistency(config.getString(config_prefix + ".consistency", "One"));
|
|
|
|
}
|
|
|
|
|
|
|
|
void CassandraSettings::setConsistency(const String & config_str)
|
|
|
|
{
|
|
|
|
if (config_str == "One")
|
|
|
|
consistency = CASS_CONSISTENCY_ONE;
|
|
|
|
else if (config_str == "Two")
|
|
|
|
consistency = CASS_CONSISTENCY_TWO;
|
|
|
|
else if (config_str == "Three")
|
|
|
|
consistency = CASS_CONSISTENCY_THREE;
|
|
|
|
else if (config_str == "All")
|
|
|
|
consistency = CASS_CONSISTENCY_ALL;
|
|
|
|
else if (config_str == "EachQuorum")
|
|
|
|
consistency = CASS_CONSISTENCY_EACH_QUORUM;
|
|
|
|
else if (config_str == "Quorum")
|
|
|
|
consistency = CASS_CONSISTENCY_QUORUM;
|
|
|
|
else if (config_str == "LocalQuorum")
|
|
|
|
consistency = CASS_CONSISTENCY_LOCAL_QUORUM;
|
|
|
|
else if (config_str == "LocalOne")
|
|
|
|
consistency = CASS_CONSISTENCY_LOCAL_ONE;
|
|
|
|
else if (config_str == "Serial")
|
|
|
|
consistency = CASS_CONSISTENCY_SERIAL;
|
|
|
|
else if (config_str == "LocalSerial")
|
|
|
|
consistency = CASS_CONSISTENCY_LOCAL_SERIAL;
|
|
|
|
else /// CASS_CONSISTENCY_ANY is only valid for writes
|
|
|
|
throw Exception("Unsupported consistency level: " + config_str, ErrorCodes::INVALID_CONFIG_PARAMETER);
|
2019-02-16 10:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const size_t max_block_size = 8192;
|
|
|
|
|
|
|
|
CassandraDictionarySource::CassandraDictionarySource(
|
2020-06-03 11:54:26 +00:00
|
|
|
const DictionaryStructure & dict_struct_,
|
|
|
|
const CassandraSettings & settings_,
|
|
|
|
const Block & sample_block_)
|
2020-06-01 17:35:30 +00:00
|
|
|
: log(&Poco::Logger::get("CassandraDictionarySource"))
|
2020-05-26 19:21:18 +00:00
|
|
|
, dict_struct(dict_struct_)
|
2020-06-03 11:54:26 +00:00
|
|
|
, settings(settings_)
|
2020-05-19 17:48:28 +00:00
|
|
|
, sample_block(sample_block_)
|
2020-07-06 12:23:36 +00:00
|
|
|
, query_builder(dict_struct, settings.db, "", settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes)
|
2019-02-16 10:11:49 +00:00
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str()));
|
|
|
|
if (settings.port)
|
|
|
|
cassandraCheck(cass_cluster_set_port(cluster, settings.port));
|
|
|
|
cass_cluster_set_credentials(cluster, settings.user.c_str(), settings.password.c_str());
|
|
|
|
cassandraCheck(cass_cluster_set_consistency(cluster, settings.consistency));
|
2019-02-16 10:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
CassandraDictionarySource::CassandraDictionarySource(
|
2020-06-03 11:54:26 +00:00
|
|
|
const DictionaryStructure & dict_struct_,
|
2020-05-19 17:48:28 +00:00
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
2020-06-03 11:54:26 +00:00
|
|
|
const String & config_prefix,
|
|
|
|
Block & sample_block_)
|
2019-02-16 10:11:49 +00:00
|
|
|
: CassandraDictionarySource(
|
2020-05-19 17:48:28 +00:00
|
|
|
dict_struct_,
|
2020-06-03 11:54:26 +00:00
|
|
|
CassandraSettings(config, config_prefix),
|
2020-05-19 17:48:28 +00:00
|
|
|
sample_block_)
|
2019-02-16 10:11:49 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-06-03 15:07:37 +00:00
|
|
|
void CassandraDictionarySource::maybeAllowFiltering(String & query) const
|
2019-02-16 10:11:49 +00:00
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
if (!settings.allow_filtering)
|
|
|
|
return;
|
|
|
|
query.pop_back(); /// remove semicolon
|
|
|
|
query += " ALLOW FILTERING;";
|
2019-02-16 10:11:49 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 19:21:18 +00:00
|
|
|
BlockInputStreamPtr CassandraDictionarySource::loadAll()
|
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
String query = query_builder.composeLoadAllQuery();
|
|
|
|
maybeAllowFiltering(query);
|
|
|
|
LOG_INFO(log, "Loading all using query: {}", query);
|
2020-06-03 15:07:37 +00:00
|
|
|
return std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size);
|
2019-05-22 21:09:29 +00:00
|
|
|
}
|
|
|
|
|
2020-06-03 15:07:37 +00:00
|
|
|
std::string CassandraDictionarySource::toString() const
|
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
return "Cassandra: " + settings.db + '.' + settings.table;
|
2019-05-22 21:09:29 +00:00
|
|
|
}
|
2019-02-16 10:11:49 +00:00
|
|
|
|
2020-05-26 19:21:18 +00:00
|
|
|
BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
String query = query_builder.composeLoadIdsQuery(ids);
|
|
|
|
maybeAllowFiltering(query);
|
|
|
|
LOG_INFO(log, "Loading ids using query: {}", query);
|
2020-06-03 15:07:37 +00:00
|
|
|
return std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size);
|
2020-05-26 19:21:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
|
|
|
|
{
|
2020-06-03 11:54:26 +00:00
|
|
|
if (requested_rows.empty())
|
|
|
|
throw Exception("No rows requested", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
/// TODO is there a better way to load data by complex keys?
|
|
|
|
std::unordered_map<UInt64, std::vector<size_t>> partitions;
|
|
|
|
for (const auto & row : requested_rows)
|
|
|
|
{
|
|
|
|
SipHash partition_key;
|
2020-06-03 15:07:37 +00:00
|
|
|
for (size_t i = 0; i < settings.partition_key_prefix; ++i)
|
2020-06-03 11:54:26 +00:00
|
|
|
key_columns[i]->updateHashWithValue(row, partition_key);
|
|
|
|
partitions[partition_key.get64()].push_back(row);
|
|
|
|
}
|
2020-05-26 19:21:18 +00:00
|
|
|
|
2020-06-03 11:54:26 +00:00
|
|
|
BlockInputStreams streams;
|
|
|
|
for (const auto & partition : partitions)
|
|
|
|
{
|
|
|
|
String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix);
|
|
|
|
maybeAllowFiltering(query);
|
|
|
|
LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query);
|
2020-06-03 15:07:37 +00:00
|
|
|
streams.push_back(std::make_shared<CassandraBlockInputStream>(getSession(), query, sample_block, max_block_size));
|
2020-06-03 11:54:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (streams.size() == 1)
|
|
|
|
return streams.front();
|
|
|
|
|
|
|
|
return std::make_shared<UnionBlockInputStream>(streams, nullptr, settings.max_threads);
|
|
|
|
}
|
2019-02-16 10:11:49 +00:00
|
|
|
|
2020-06-03 15:07:37 +00:00
|
|
|
BlockInputStreamPtr CassandraDictionarySource::loadUpdatedAll()
|
|
|
|
{
|
|
|
|
throw Exception("Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
|
|
|
CassSessionShared CassandraDictionarySource::getSession()
|
|
|
|
{
|
|
|
|
/// Reuse connection if exists, create new one if not
|
|
|
|
auto session = maybe_session.lock();
|
|
|
|
if (session)
|
|
|
|
return session;
|
|
|
|
|
|
|
|
std::lock_guard lock(connect_mutex);
|
|
|
|
session = maybe_session.lock();
|
|
|
|
if (session)
|
|
|
|
return session;
|
|
|
|
|
|
|
|
session = std::make_shared<CassSessionPtr>();
|
|
|
|
CassFuturePtr future = cass_session_connect(*session, cluster);
|
|
|
|
cassandraWaitAndCheck(future);
|
|
|
|
maybe_session = session;
|
|
|
|
return session;
|
|
|
|
}
|
|
|
|
|
2019-02-16 10:11:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|