2018-11-28 11:37:12 +00:00
|
|
|
#include "HTTPDictionarySource.h"
|
2021-10-15 20:18:20 +00:00
|
|
|
#include <Formats/formatBlock.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <IO/ConnectionTimeouts.h>
|
2020-12-10 22:05:02 +00:00
|
|
|
#include <IO/ConnectionTimeoutsContext.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadWriteBufferFromHTTP.h>
|
|
|
|
#include <IO/WriteBufferFromOStream.h>
|
2019-12-05 09:18:51 +00:00
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2021-08-05 18:08:52 +00:00
|
|
|
#include <Processors/Formats/IInputFormat.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Poco/Net/HTTPRequest.h>
|
2022-04-27 15:05:45 +00:00
|
|
|
#include <Common/logger_useful.h>
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionarySourceFactory.h"
|
2018-12-10 15:25:45 +00:00
|
|
|
#include "DictionarySourceHelpers.h"
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionaryStructure.h"
|
2019-12-15 06:34:43 +00:00
|
|
|
#include "registerDictionaries.h"
|
2018-11-28 11:37:12 +00:00
|
|
|
|
2017-01-21 04:24:28 +00:00
|
|
|
|
2016-11-15 19:51:06 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
2021-05-24 21:27:24 +00:00
|
|
|
|
2019-02-10 16:55:12 +00:00
|
|
|
static const UInt64 max_block_size = 8192;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
HTTPDictionarySource::HTTPDictionarySource(
|
|
|
|
const DictionaryStructure & dict_struct_,
|
2021-06-10 08:34:57 +00:00
|
|
|
const Configuration & configuration_,
|
|
|
|
const Poco::Net::HTTPBasicCredentials & credentials_,
|
2019-08-03 11:02:40 +00:00
|
|
|
Block & sample_block_,
|
2021-06-01 12:20:52 +00:00
|
|
|
ContextPtr context_,
|
2021-05-24 21:27:24 +00:00
|
|
|
bool created_from_ddl)
|
2020-05-30 21:57:37 +00:00
|
|
|
: log(&Poco::Logger::get("HTTPDictionarySource"))
|
2021-06-10 08:34:57 +00:00
|
|
|
, update_time(std::chrono::system_clock::from_time_t(0))
|
|
|
|
, dict_struct(dict_struct_)
|
|
|
|
, configuration(configuration_)
|
|
|
|
, sample_block(sample_block_)
|
2019-08-03 11:02:40 +00:00
|
|
|
, context(context_)
|
2019-03-29 18:10:03 +00:00
|
|
|
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context))
|
2016-11-15 19:51:06 +00:00
|
|
|
{
|
2021-05-24 21:27:24 +00:00
|
|
|
if (created_from_ddl)
|
2021-06-10 08:34:57 +00:00
|
|
|
context->getRemoteHostFilter().checkURL(Poco::URI(configuration.url));
|
2019-12-10 17:27:29 +00:00
|
|
|
|
2021-06-10 08:34:57 +00:00
|
|
|
credentials.setUsername(credentials_.getUsername());
|
|
|
|
credentials.setPassword(credentials_.getPassword());
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2017-01-21 04:24:28 +00:00
|
|
|
HTTPDictionarySource::HTTPDictionarySource(const HTTPDictionarySource & other)
|
2020-05-30 21:57:37 +00:00
|
|
|
: log(&Poco::Logger::get("HTTPDictionarySource"))
|
2021-06-10 08:34:57 +00:00
|
|
|
, update_time(other.update_time)
|
|
|
|
, dict_struct(other.dict_struct)
|
|
|
|
, configuration(other.configuration)
|
|
|
|
, sample_block(other.sample_block)
|
2021-04-10 23:33:54 +00:00
|
|
|
, context(Context::createCopy(other.context))
|
2019-03-29 18:10:03 +00:00
|
|
|
, timeouts(ConnectionTimeouts::getHTTPTimeouts(context))
|
2016-11-15 19:51:06 +00:00
|
|
|
{
|
2019-09-26 03:34:22 +00:00
|
|
|
credentials.setUsername(other.credentials.getUsername());
|
|
|
|
credentials.setPassword(other.credentials.getPassword());
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2021-08-05 18:08:52 +00:00
|
|
|
Pipe HTTPDictionarySource::createWrappedBuffer(std::unique_ptr<ReadWriteBufferFromHTTP> http_buffer_ptr)
|
2021-05-08 07:15:14 +00:00
|
|
|
{
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2021-05-10 20:32:30 +00:00
|
|
|
String http_request_compression_method_str = http_buffer_ptr->getCompressionMethod();
|
2021-05-08 07:15:14 +00:00
|
|
|
auto in_ptr_wrapped
|
2021-05-09 18:58:08 +00:00
|
|
|
= wrapReadBufferWithCompressionMethod(std::move(http_buffer_ptr), chooseCompressionMethod(uri.getPath(), http_request_compression_method_str));
|
2021-10-11 16:11:50 +00:00
|
|
|
auto source = context->getInputFormat(configuration.format, *in_ptr_wrapped, sample_block, max_block_size);
|
2021-08-05 18:08:52 +00:00
|
|
|
source->addBuffer(std::move(in_ptr_wrapped));
|
|
|
|
return Pipe(std::move(source));
|
2021-05-07 23:20:30 +00:00
|
|
|
}
|
|
|
|
|
2018-05-16 13:22:27 +00:00
|
|
|
void HTTPDictionarySource::getUpdateFieldAndDate(Poco::URI & uri)
|
2018-01-15 12:44:39 +00:00
|
|
|
{
|
2018-02-15 13:08:23 +00:00
|
|
|
if (update_time != std::chrono::system_clock::from_time_t(0))
|
2018-01-15 12:44:39 +00:00
|
|
|
{
|
|
|
|
auto tmp_time = update_time;
|
|
|
|
update_time = std::chrono::system_clock::now();
|
2021-06-10 08:34:57 +00:00
|
|
|
time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - configuration.update_lag;
|
2019-12-05 09:18:51 +00:00
|
|
|
WriteBufferFromOwnString out;
|
|
|
|
writeDateTimeText(hr_time, out);
|
2021-06-10 08:34:57 +00:00
|
|
|
uri.addQueryParameter(configuration.update_field, out.str());
|
2018-01-15 12:44:39 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
update_time = std::chrono::system_clock::now();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-05 18:08:52 +00:00
|
|
|
Pipe HTTPDictionarySource::loadAll()
|
2016-11-15 19:51:06 +00:00
|
|
|
{
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(log, "loadAll {}", toString());
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2018-12-10 15:25:45 +00:00
|
|
|
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
|
2021-05-08 07:15:14 +00:00
|
|
|
uri,
|
|
|
|
Poco::Net::HTTPRequest::HTTP_GET,
|
|
|
|
ReadWriteBufferFromHTTP::OutStreamCallback(),
|
|
|
|
timeouts,
|
|
|
|
credentials,
|
2021-10-28 10:28:05 +00:00
|
|
|
0,
|
2021-05-08 07:15:14 +00:00
|
|
|
DBMS_DEFAULT_BUFFER_SIZE,
|
2021-10-07 13:39:54 +00:00
|
|
|
context->getReadSettings(),
|
2021-10-28 10:28:05 +00:00
|
|
|
configuration.header_entries,
|
2021-10-29 09:24:53 +00:00
|
|
|
ReadWriteBufferFromHTTP::Range{},
|
2022-03-15 13:00:31 +00:00
|
|
|
nullptr, false);
|
2021-06-10 08:34:57 +00:00
|
|
|
|
2021-05-08 07:15:14 +00:00
|
|
|
return createWrappedBuffer(std::move(in_ptr));
|
2018-02-15 13:08:23 +00:00
|
|
|
}
|
2018-01-15 12:44:39 +00:00
|
|
|
|
2021-08-05 18:08:52 +00:00
|
|
|
Pipe HTTPDictionarySource::loadUpdatedAll()
|
2018-02-15 13:08:23 +00:00
|
|
|
{
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2018-05-16 13:22:27 +00:00
|
|
|
getUpdateFieldAndDate(uri);
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(log, "loadUpdatedAll {}", uri.toString());
|
2018-12-10 15:25:45 +00:00
|
|
|
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
|
2021-05-08 07:15:14 +00:00
|
|
|
uri,
|
|
|
|
Poco::Net::HTTPRequest::HTTP_GET,
|
|
|
|
ReadWriteBufferFromHTTP::OutStreamCallback(),
|
|
|
|
timeouts,
|
|
|
|
credentials,
|
2021-10-28 10:28:05 +00:00
|
|
|
0,
|
2021-05-08 07:15:14 +00:00
|
|
|
DBMS_DEFAULT_BUFFER_SIZE,
|
2021-10-07 13:39:54 +00:00
|
|
|
context->getReadSettings(),
|
2021-10-28 10:28:05 +00:00
|
|
|
configuration.header_entries,
|
2021-10-29 09:24:53 +00:00
|
|
|
ReadWriteBufferFromHTTP::Range{},
|
2022-03-15 13:00:31 +00:00
|
|
|
nullptr, false);
|
2021-06-10 08:34:57 +00:00
|
|
|
|
2021-05-07 23:20:30 +00:00
|
|
|
return createWrappedBuffer(std::move(in_ptr));
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2021-08-05 18:08:52 +00:00
|
|
|
Pipe HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
2016-11-15 19:51:06 +00:00
|
|
|
{
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
|
2021-01-27 13:23:02 +00:00
|
|
|
|
|
|
|
auto block = blockForIds(dict_struct, ids);
|
2016-11-19 00:07:58 +00:00
|
|
|
|
2021-01-26 20:49:52 +00:00
|
|
|
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
2017-05-25 19:21:57 +00:00
|
|
|
{
|
|
|
|
WriteBufferFromOStream out_buffer(ostr);
|
2021-10-13 17:13:30 +00:00
|
|
|
auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty());
|
2021-10-11 16:11:50 +00:00
|
|
|
formatBlock(output_format, block);
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
2016-11-24 19:57:24 +00:00
|
|
|
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2019-09-25 08:08:46 +00:00
|
|
|
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
|
2021-05-09 19:02:37 +00:00
|
|
|
uri,
|
|
|
|
Poco::Net::HTTPRequest::HTTP_POST,
|
|
|
|
out_stream_callback,
|
|
|
|
timeouts,
|
|
|
|
credentials,
|
2021-10-28 10:28:05 +00:00
|
|
|
0,
|
2021-05-09 19:02:37 +00:00
|
|
|
DBMS_DEFAULT_BUFFER_SIZE,
|
2021-10-07 13:39:54 +00:00
|
|
|
context->getReadSettings(),
|
2021-10-28 10:28:05 +00:00
|
|
|
configuration.header_entries,
|
2021-10-29 09:24:53 +00:00
|
|
|
ReadWriteBufferFromHTTP::Range{},
|
2022-03-15 13:00:31 +00:00
|
|
|
nullptr, false);
|
2021-06-10 08:34:57 +00:00
|
|
|
|
2021-05-07 23:20:30 +00:00
|
|
|
return createWrappedBuffer(std::move(in_ptr));
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2021-08-05 18:08:52 +00:00
|
|
|
Pipe HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
|
2016-11-15 19:51:06 +00:00
|
|
|
{
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size());
|
2016-11-22 15:03:54 +00:00
|
|
|
|
2021-01-26 20:49:52 +00:00
|
|
|
auto block = blockForKeys(dict_struct, key_columns, requested_rows);
|
|
|
|
|
|
|
|
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
2017-05-25 19:21:57 +00:00
|
|
|
{
|
|
|
|
WriteBufferFromOStream out_buffer(ostr);
|
2021-10-13 17:13:30 +00:00
|
|
|
auto output_format = context->getOutputFormatParallelIfPossible(configuration.format, out_buffer, block.cloneEmpty());
|
2021-10-11 16:11:50 +00:00
|
|
|
formatBlock(output_format, block);
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
2016-11-22 15:03:54 +00:00
|
|
|
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2019-09-25 08:08:46 +00:00
|
|
|
auto in_ptr = std::make_unique<ReadWriteBufferFromHTTP>(
|
2021-05-08 09:46:50 +00:00
|
|
|
uri,
|
|
|
|
Poco::Net::HTTPRequest::HTTP_POST,
|
|
|
|
out_stream_callback,
|
|
|
|
timeouts,
|
|
|
|
credentials,
|
2021-10-28 10:28:05 +00:00
|
|
|
0,
|
2021-05-08 09:46:50 +00:00
|
|
|
DBMS_DEFAULT_BUFFER_SIZE,
|
2021-10-07 13:39:54 +00:00
|
|
|
context->getReadSettings(),
|
2021-10-28 10:28:05 +00:00
|
|
|
configuration.header_entries,
|
2021-10-29 09:24:53 +00:00
|
|
|
ReadWriteBufferFromHTTP::Range{},
|
2022-03-15 13:00:31 +00:00
|
|
|
nullptr, false);
|
2021-06-10 08:34:57 +00:00
|
|
|
|
2021-05-07 23:20:30 +00:00
|
|
|
return createWrappedBuffer(std::move(in_ptr));
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool HTTPDictionarySource::isModified() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool HTTPDictionarySource::supportsSelectiveLoad() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return true;
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2018-01-15 12:44:39 +00:00
|
|
|
bool HTTPDictionarySource::hasUpdateField() const
|
|
|
|
{
|
2021-06-10 08:34:57 +00:00
|
|
|
return !configuration.update_field.empty();
|
2018-01-15 12:44:39 +00:00
|
|
|
}
|
|
|
|
|
2016-11-15 19:51:06 +00:00
|
|
|
DictionarySourcePtr HTTPDictionarySource::clone() const
|
|
|
|
{
|
2021-12-15 12:55:28 +00:00
|
|
|
return std::make_shared<HTTPDictionarySource>(*this);
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string HTTPDictionarySource::toString() const
|
|
|
|
{
|
2021-06-10 08:34:57 +00:00
|
|
|
Poco::URI uri(configuration.url);
|
2017-04-01 07:20:54 +00:00
|
|
|
return uri.toString();
|
2016-11-15 19:51:06 +00:00
|
|
|
}
|
|
|
|
|
2018-11-28 11:37:12 +00:00
|
|
|
void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
|
|
|
|
{
|
2020-03-23 02:12:31 +00:00
|
|
|
auto create_table_source = [=](const DictionaryStructure & dict_struct,
|
2021-05-08 07:15:14 +00:00
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const std::string & config_prefix,
|
|
|
|
Block & sample_block,
|
2021-08-12 15:16:55 +00:00
|
|
|
ContextPtr global_context,
|
2021-05-08 07:15:14 +00:00
|
|
|
const std::string & /* default_database */,
|
2021-05-24 21:27:24 +00:00
|
|
|
bool created_from_ddl) -> DictionarySourcePtr {
|
2018-11-28 11:37:12 +00:00
|
|
|
if (dict_struct.has_expressions)
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary source of type `http` does not support attribute expressions");
|
2018-11-28 11:37:12 +00:00
|
|
|
|
2021-08-12 15:16:55 +00:00
|
|
|
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
|
2020-04-03 21:32:06 +00:00
|
|
|
|
2021-06-10 08:34:57 +00:00
|
|
|
const auto & settings_config_prefix = config_prefix + ".http";
|
|
|
|
const auto & credentials_prefix = settings_config_prefix + ".credentials";
|
|
|
|
|
|
|
|
Poco::Net::HTTPBasicCredentials credentials;
|
|
|
|
|
|
|
|
if (config.has(credentials_prefix))
|
|
|
|
{
|
|
|
|
credentials.setUsername(config.getString(credentials_prefix + ".user", ""));
|
|
|
|
credentials.setPassword(config.getString(credentials_prefix + ".password", ""));
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto & headers_prefix = settings_config_prefix + ".headers";
|
|
|
|
ReadWriteBufferFromHTTP::HTTPHeaderEntries header_entries;
|
|
|
|
|
|
|
|
if (config.has(headers_prefix))
|
|
|
|
{
|
|
|
|
Poco::Util::AbstractConfiguration::Keys config_keys;
|
|
|
|
config.keys(headers_prefix, config_keys);
|
|
|
|
|
|
|
|
header_entries.reserve(config_keys.size());
|
|
|
|
for (const auto & key : config_keys)
|
|
|
|
{
|
|
|
|
const auto header_key = config.getString(headers_prefix + "." + key + ".name", "");
|
|
|
|
const auto header_value = config.getString(headers_prefix + "." + key + ".value", "");
|
|
|
|
header_entries.emplace_back(std::make_tuple(header_key, header_value));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto configuration = HTTPDictionarySource::Configuration
|
|
|
|
{
|
|
|
|
.url = config.getString(settings_config_prefix + ".url", ""),
|
|
|
|
.format =config.getString(settings_config_prefix + ".format", ""),
|
|
|
|
.update_field = config.getString(settings_config_prefix + ".update_field", ""),
|
|
|
|
.update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1),
|
2021-10-11 11:28:46 +00:00
|
|
|
.header_entries = std::move(header_entries) //-V1030
|
2021-06-10 08:34:57 +00:00
|
|
|
};
|
|
|
|
|
2021-08-12 15:16:55 +00:00
|
|
|
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context, created_from_ddl);
|
2018-11-28 11:37:12 +00:00
|
|
|
};
|
2020-03-23 02:12:31 +00:00
|
|
|
factory.registerSource("http", create_table_source);
|
2018-11-28 11:37:12 +00:00
|
|
|
}
|
|
|
|
|
2021-08-06 08:41:45 +00:00
|
|
|
}
|