#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NETWORK_ERROR; } IStorageURLBase::IStorageURLBase( const Poco::URI & uri_, ContextPtr /*context_*/, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, const String & compression_method_) : IStorage(table_id_) , uri(uri_) , compression_method(compression_method_) , format_name(format_name_) , format_settings(format_settings_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); } namespace { class StorageURLSource : public SourceWithProgress { public: StorageURLSource(const Poco::URI & uri, const std::string & method, std::function callback, const String & format, const std::optional & format_settings, String name_, const Block & sample_block, ContextPtr context, const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, const CompressionMethod compression_method) : SourceWithProgress(sample_block), name(std::move(name_)) { ReadWriteBufferFromHTTP::HTTPHeaderEntries header; // Propagate OpenTelemetry trace context, if any, downstream. if (CurrentThread::isInitialized()) { const auto & thread_trace_context = CurrentThread::get().thread_trace_context; if (thread_trace_context.trace_id) { header.emplace_back("traceparent", thread_trace_context.composeTraceparentHeader()); if (!thread_trace_context.tracestate.empty()) { header.emplace_back("tracestate", thread_trace_context.tracestate); } } } read_buf = wrapReadBufferWithCompressionMethod( std::make_unique( uri, method, std::move(callback), timeouts, context->getSettingsRef().max_http_get_redirects, Poco::Net::HTTPBasicCredentials{}, DBMS_DEFAULT_BUFFER_SIZE, header, context->getRemoteHostFilter()), compression_method); auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); reader = std::make_shared(input_format); reader = std::make_shared(reader, columns, context); } String getName() const override { return name; } Chunk generate() override { if (!reader) return {}; if (!initialized) reader->readPrefix(); initialized = true; if (auto block = reader->read()) return Chunk(block.getColumns(), block.rows()); reader->readSuffix(); reader.reset(); return {}; } private: String name; std::unique_ptr read_buf; BlockInputStreamPtr reader; bool initialized = false; }; } StorageURLBlockOutputStream::StorageURLBlockOutputStream(const Poco::URI & uri, const String & format, const std::optional & format_settings, const Block & sample_block_, ContextPtr context, const ConnectionTimeouts & timeouts, const CompressionMethod compression_method) : sample_block(sample_block_) { write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_POST, timeouts), compression_method, 3); writer = FormatFactory::instance().getOutputStream(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); } void StorageURLBlockOutputStream::write(const Block & block) { writer->write(block); } void StorageURLBlockOutputStream::writePrefix() { writer->writePrefix(); } void StorageURLBlockOutputStream::writeSuffix() { writer->writeSuffix(); writer->flush(); write_buf->finalize(); } std::string IStorageURLBase::getReadMethod() const { return Poco::Net::HTTPRequest::HTTP_GET; } std::vector> IStorageURLBase::getReadURIParams( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { return {}; } std::function IStorageURLBase::getReadPOSTDataCallback( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { return nullptr; } Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned /*num_streams*/) { auto request_uri = uri; auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); return Pipe(std::make_shared( request_uri, getReadMethod(), getReadPOSTDataCallback( column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), getHeaderBlock(column_names, metadata_snapshot), local_context, metadata_snapshot->getColumns(), max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), chooseCompressionMethod(request_uri.getPath(), compression_method))); } Pipe StorageURLWithFailover::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned /*num_streams*/) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); for (const auto & uri_option : uri_options) { auto request_uri = uri_option; for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); try { /// Chech for uri accessibility is done in constructor of ReadWriteBufferFromHTTP while creating StorageURLSource. auto url_source = std::make_shared( request_uri, getReadMethod(), getReadPOSTDataCallback( column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), getHeaderBlock(column_names, metadata_snapshot), local_context, metadata_snapshot->getColumns(), max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), chooseCompressionMethod(request_uri.getPath(), compression_method)); std::shuffle(uri_options.begin(), uri_options.begin(), thread_local_rng); return Pipe(url_source); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } } throw Exception(ErrorCodes::NETWORK_ERROR, "All uri options are unreachable"); } BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { return std::make_shared(uri, format_name, format_settings, metadata_snapshot->getSampleBlock(), context, ConnectionTimeouts::getHTTPTimeouts(context), chooseCompressionMethod(uri.toString(), compression_method)); } StorageURL::StorageURL(const Poco::URI & uri_, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, const String & compression_method_) : IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_, columns_, constraints_, compression_method_) { context_->getRemoteHostFilter().checkURL(uri); } StorageURLWithFailover::StorageURLWithFailover( const std::vector & uri_options_, const StorageID & table_id_, const String & format_name_, const std::optional & format_settings_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, const String & compression_method_) : StorageURL(Poco::URI(), table_id_, format_name_, format_settings_, columns_, constraints_, context_, compression_method_) { for (const auto & uri_option : uri_options_) { Poco::URI uri(uri_option); context_->getRemoteHostFilter().checkURL(uri); uri_options.emplace_back(std::move(uri)); LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); } } FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Arguments & args) { // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. FormatSettings format_settings; if (args.storage_def->settings) { FormatFactorySettings user_format_settings; // Apply changed settings from global context, but ignore the // unknown ones, because we only have the format settings here. const auto & changes = args.getContext()->getSettingsRef().changes(); for (const auto & change : changes) { if (user_format_settings.has(change.name)) { user_format_settings.set(change.name, change.value); } } // Apply changes from SETTINGS clause, with validation. user_format_settings.applyChanges(args.storage_def->settings->changes); format_settings = getFormatSettings(args.getContext(), user_format_settings); } else { format_settings = getFormatSettings(args.getContext()); } return format_settings; } void registerStorageURL(StorageFactory & factory) { factory.registerStorage("URL", [](const StorageFactory::Arguments & args) { ASTs & engine_args = args.engine_args; if (engine_args.size() != 2 && engine_args.size() != 3) throw Exception( "Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); const String & url = engine_args[0]->as().value.safeGet(); Poco::URI uri(url); engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext()); const String & format_name = engine_args[1]->as().value.safeGet(); String compression_method = "auto"; if (engine_args.size() == 3) { engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext()); compression_method = engine_args[2]->as().value.safeGet(); } auto format_settings = StorageURL::getFormatSettingsFromArgs(args); return StorageURL::create( uri, args.table_id, format_name, format_settings, args.columns, args.constraints, args.getContext(), compression_method); }, { .supports_settings = true, .source_access_type = AccessType::URL, }); } }