ClickHouse/src/Dictionaries/HTTPDictionarySource.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

303 lines
11 KiB
C++
Raw Normal View History

#include "HTTPDictionarySource.h"
2021-10-15 20:18:20 +00:00
#include <Formats/formatBlock.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/ReadWriteBufferFromHTTP.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <Processors/Formats/IInputFormat.h>
#include <Interpreters/Context.h>
#include <Storages/ExternalDataSourceConfiguration.h>
#include <Poco/Net/HTTPRequest.h>
2022-04-27 15:05:45 +00:00
#include <Common/logger_useful.h>
#include "DictionarySourceFactory.h"
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h"
2019-12-15 06:34:43 +00:00
#include "registerDictionaries.h"
2016-11-15 19:51:06 +00:00
namespace DB
{
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2019-02-10 16:55:12 +00:00
static const UInt64 max_block_size = 8192;
2016-12-08 02:49:04 +00:00
HTTPDictionarySource::HTTPDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
const Poco::Net::HTTPBasicCredentials & credentials_,
2019-08-03 11:02:40 +00:00
Block & sample_block_,
ContextPtr context_)
2024-01-23 17:04:50 +00:00
: log(getLogger("HTTPDictionarySource"))
, update_time(std::chrono::system_clock::from_time_t(0))
, dict_struct(dict_struct_)
, configuration(configuration_)
, sample_block(sample_block_)
2019-08-03 11:02:40 +00:00
, context(context_)
2023-12-05 12:34:37 +00:00
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
2016-11-15 19:51:06 +00:00
{
credentials.setUsername(credentials_.getUsername());
credentials.setPassword(credentials_.getPassword());
2016-11-15 19:51:06 +00:00
}
HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other)
2024-01-23 17:04:50 +00:00
: log(getLogger("HTTPDictionarySource"))
, update_time(other.update_time)
, dict_struct(other.dict_struct)
, configuration(other.configuration)
, sample_block(other.sample_block)
, context(Context::createCopy(other.context))
2023-12-05 12:34:37 +00:00
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout))
2016-11-15 19:51:06 +00:00
{
2019-09-26 03:34:22 +00:00
credentials.setUsername(other.credentials.getUsername());
credentials.setPassword(other.credentials.getPassword());
2016-11-15 19:51:06 +00:00
}
2022-05-20 19:49:31 +00:00
QueryPipeline HTTPDictionarySource::createWrappedBuffer(std::unique_ptr<ReadWriteBufferFromHTTP> http_buffer_ptr)
2021-05-08 07:15:14 +00:00
{
Poco::URI uri(configuration.url);
2021-05-10 20:32:30 +00:00
String http_request_compression_method_str = http_buffer_ptr->getCompressionMethod();
2021-05-08 07:15:14 +00:00
auto in_ptr_wrapped
2021-05-09 18:58:08 +00:00
= wrapReadBufferWithCompressionMethod(std::move(http_buffer_ptr), chooseCompressionMethod(uri.getPath(), http_request_compression_method_str));
2021-10-11 16:11:50 +00:00
auto source = context->getInputFormat(configuration.format, *in_ptr_wrapped, sample_block, max_block_size);
source->addBuffer(std::move(in_ptr_wrapped));
2022-05-20 19:49:31 +00:00
return QueryPipeline(std::move(source));
}
void HTTPDictionarySource::getUpdateFieldAndDate(Poco::URI & uri)
{
if (update_time != std::chrono::system_clock::from_time_t(0))
{
auto tmp_time = update_time;
update_time = std::chrono::system_clock::now();
time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - configuration.update_lag;
WriteBufferFromOwnString out;
writeDateTimeText(hr_time, out);
uri.addQueryParameter(configuration.update_field, out.str());
}
else
{
update_time = std::chrono::system_clock::now();
}
}
2022-05-20 19:49:31 +00:00
QueryPipeline HTTPDictionarySource::loadAll()
2016-11-15 19:51:06 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, "loadAll {}", toString());
2024-03-03 13:22:40 +00:00
Poco::URI uri(configuration.url);
2024-03-03 13:22:40 +00:00
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
}
2022-05-20 19:49:31 +00:00
QueryPipeline HTTPDictionarySource::loadUpdatedAll()
{
Poco::URI uri(configuration.url);
getUpdateFieldAndDate(uri);
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, "loadUpdatedAll {}", uri.toString());
2024-03-03 13:22:40 +00:00
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
2016-11-15 19:51:06 +00:00
}
2022-05-20 19:49:31 +00:00
QueryPipeline HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
2016-11-15 19:51:06 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
2021-01-27 13:23:02 +00:00
auto block = blockForIds(dict_struct, ids);
2016-11-19 00:07:58 +00:00
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
2017-05-25 19:21:57 +00:00
{
WriteBufferFromOStream out_buffer(ostr);
2021-10-13 17:13:30 +00:00
auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty());
2021-10-11 16:11:50 +00:00
formatBlock(output_format, block);
2023-06-27 14:54:14 +00:00
out_buffer.finalize();
2016-11-22 15:03:54 +00:00
};
2016-11-24 19:57:24 +00:00
Poco::URI uri(configuration.url);
2024-03-03 13:22:40 +00:00
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withOutCallback(std::move(out_stream_callback))
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
2016-11-15 19:51:06 +00:00
}
2022-05-20 19:49:31 +00:00
QueryPipeline HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
2016-11-15 19:51:06 +00:00
{
2020-05-23 22:24:01 +00:00
LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
2016-11-22 15:03:54 +00:00
auto block = blockForKeys(dict_struct, key_columns, requested_rows);
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
2017-05-25 19:21:57 +00:00
{
WriteBufferFromOStream out_buffer(ostr);
2021-10-13 17:13:30 +00:00
auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty());
2021-10-11 16:11:50 +00:00
formatBlock(output_format, block);
2023-06-27 14:54:14 +00:00
out_buffer.finalize();
2016-11-22 15:03:54 +00:00
};
Poco::URI uri(configuration.url);
2024-03-03 13:22:40 +00:00
auto buf = BuilderRWBufferFromHTTP(uri)
.withConnectionGroup(HTTPConnectionGroupType::STORAGE)
.withMethod(Poco::Net::HTTPRequest::HTTP_POST)
.withSettings(context->getReadSettings())
.withTimeouts(timeouts)
.withHeaders(configuration.header_entries)
.withOutCallback(std::move(out_stream_callback))
.withDelayInit(false)
.create(credentials);
return createWrappedBuffer(std::move(buf));
2016-11-15 19:51:06 +00:00
}
bool HTTPDictionarySource::isModified() const
{
2016-11-18 01:48:13 +00:00
return true;
2016-11-15 19:51:06 +00:00
}
bool HTTPDictionarySource::supportsSelectiveLoad() const
{
2016-11-23 22:44:53 +00:00
return true;
2016-11-15 19:51:06 +00:00
}
bool HTTPDictionarySource::hasUpdateField() const
{
return !configuration.update_field.empty();
}
2016-11-15 19:51:06 +00:00
DictionarySourcePtr HTTPDictionarySource::clone() const
{
return std::make_shared<HTTPDictionarySource>(*this);
2016-11-15 19:51:06 +00:00
}
std::string HTTPDictionarySource::toString() const
{
Poco::URI uri(configuration.url);
2016-11-24 01:01:11 +00:00
return uri.toString();
2016-11-15 19:51:06 +00:00
}
void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
{
auto create_table_source = [=](const DictionaryStructure & dict_struct,
2021-05-08 07:15:14 +00:00
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
2021-08-12 15:16:55 +00:00
ContextPtr global_context,
2021-05-08 07:15:14 +00:00
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr {
if (dict_struct.has_expressions)
2021-04-10 18:48:36 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary source of type `http` does not support attribute expressions");
auto settings_config_prefix = config_prefix + ".http";
Poco::Net::HTTPBasicCredentials credentials;
HTTPHeaderEntries header_entries;
String url;
2022-05-27 17:43:34 +00:00
String endpoint;
String format;
auto named_collection = created_from_ddl
? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context)
: std::nullopt;
if (named_collection)
{
url = named_collection->configuration.url;
2022-05-27 17:43:34 +00:00
endpoint = named_collection->configuration.endpoint;
format = named_collection->configuration.format;
credentials.setUsername(named_collection->configuration.user);
credentials.setPassword(named_collection->configuration.password);
header_entries.reserve(named_collection->configuration.headers.size());
2022-09-13 13:07:43 +00:00
for (const auto & [key, value] : named_collection->configuration.headers)
header_entries.emplace_back(key, value);
}
else
{
const auto & credentials_prefix = settings_config_prefix + ".credentials";
if (config.has(credentials_prefix))
{
credentials.setUsername(config.getString(credentials_prefix + ".user", ""));
credentials.setPassword(config.getString(credentials_prefix + ".password", ""));
}
const auto & headers_prefix = settings_config_prefix + ".headers";
if (config.has(headers_prefix))
{
Poco::Util::AbstractConfiguration::Keys config_keys;
config.keys(headers_prefix, config_keys);
header_entries.reserve(config_keys.size());
for (const auto & key : config_keys)
{
const auto header_key = config.getString(headers_prefix + "." + key + ".name", "");
const auto header_value = config.getString(headers_prefix + "." + key + ".value", "");
header_entries.emplace_back(header_key, header_value);
}
}
url = config.getString(settings_config_prefix + ".url", "");
2022-05-27 17:43:34 +00:00
endpoint = config.getString(settings_config_prefix + ".endpoint", "");
format =config.getString(settings_config_prefix + ".format", "");
}
2022-05-27 17:43:34 +00:00
if (url.ends_with('/'))
{
if (endpoint.starts_with('/'))
url.pop_back();
}
else if (!endpoint.empty() && !endpoint.starts_with('/'))
url.push_back('/');
auto configuration = HTTPDictionarySource::Configuration
{
2022-05-27 17:43:34 +00:00
.url = url + endpoint,
.format = format,
.update_field = config.getString(settings_config_prefix + ".update_field", ""),
.update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1),
2023-02-19 22:15:09 +00:00
.header_entries = std::move(header_entries)
};
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
if (created_from_ddl)
{
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
context->getHTTPHeaderFilter().checkHeaders(configuration.header_entries);
}
2022-07-13 15:33:18 +00:00
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context);
};
factory.registerSource("http", create_table_source);
}
}