2018-11-28 11:37:12 +00:00
|
|
|
#include "FileDictionarySource.h"
|
2021-04-07 18:38:24 +00:00
|
|
|
|
|
|
|
#include <Poco/File.h>
|
|
|
|
#include <filesystem>
|
|
|
|
|
2017-05-25 19:26:17 +00:00
|
|
|
#include <DataStreams/OwningBlockInputStream.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadBufferFromFile.h>
|
2018-12-10 15:25:45 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2019-12-12 08:57:25 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
2020-12-07 23:10:22 +00:00
|
|
|
#include <common/logger_useful.h>
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionarySourceFactory.h"
|
|
|
|
#include "DictionaryStructure.h"
|
2019-12-15 06:34:43 +00:00
|
|
|
#include "registerDictionaries.h"
|
2020-04-03 21:32:06 +00:00
|
|
|
#include "DictionarySourceHelpers.h"
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2019-02-10 16:55:12 +00:00
|
|
|
static const UInt64 max_block_size = 8192;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2019-12-10 17:27:29 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2019-12-10 17:27:29 +00:00
|
|
|
extern const int PATH_ACCESS_DENIED;
|
|
|
|
}
|
|
|
|
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
FileDictionarySource::FileDictionarySource(
|
2019-12-10 17:27:29 +00:00
|
|
|
const std::string & filepath_, const std::string & format_,
|
2021-04-10 23:33:54 +00:00
|
|
|
Block & sample_block_, ContextPtr context_, bool check_config)
|
2019-12-10 17:27:29 +00:00
|
|
|
: filepath{filepath_}
|
|
|
|
, format{format_}
|
|
|
|
, sample_block{sample_block_}
|
|
|
|
, context(context_)
|
2018-12-10 15:25:45 +00:00
|
|
|
{
|
2019-12-10 17:27:29 +00:00
|
|
|
if (check_config)
|
|
|
|
{
|
2021-04-07 18:38:24 +00:00
|
|
|
auto source_file_path = std::filesystem::path(filepath);
|
|
|
|
auto source_file_absolute_path = std::filesystem::canonical(source_file_path);
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
String user_files_path_string_value = context->getUserFilesPath();
|
2021-04-07 18:38:24 +00:00
|
|
|
auto user_files_path = std::filesystem::path(user_files_path_string_value);
|
|
|
|
auto user_files_absolute_path = std::filesystem::canonical(user_files_path);
|
|
|
|
|
|
|
|
auto [_, user_files_absolute_path_mismatch_it] = std::mismatch(source_file_absolute_path.begin(), source_file_absolute_path.end(), user_files_absolute_path.begin(), user_files_absolute_path.end());
|
|
|
|
|
|
|
|
bool user_files_absolute_path_include_source_file_absolute_path = user_files_absolute_path_mismatch_it == user_files_absolute_path.end();
|
|
|
|
|
|
|
|
if (!user_files_absolute_path_include_source_file_absolute_path)
|
|
|
|
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path_string_value);
|
2019-12-10 17:27:29 +00:00
|
|
|
}
|
2018-12-10 15:25:45 +00:00
|
|
|
}
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
FileDictionarySource::FileDictionarySource(const FileDictionarySource & other)
|
2019-12-10 17:27:29 +00:00
|
|
|
: filepath{other.filepath}
|
2018-12-10 15:25:45 +00:00
|
|
|
, format{other.format}
|
|
|
|
, sample_block{other.sample_block}
|
2021-04-10 23:33:54 +00:00
|
|
|
, context(Context::createCopy(other.context))
|
2018-12-10 15:25:45 +00:00
|
|
|
, last_modification{other.last_modification}
|
|
|
|
{
|
|
|
|
}
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
BlockInputStreamPtr FileDictionarySource::loadAll()
|
|
|
|
{
|
2020-05-23 22:24:01 +00:00
|
|
|
LOG_TRACE(&Poco::Logger::get("FileDictionary"), "loadAll {}", toString());
|
2019-12-10 17:27:29 +00:00
|
|
|
auto in_ptr = std::make_unique<ReadBufferFromFile>(filepath);
|
2021-04-10 23:33:54 +00:00
|
|
|
auto stream = context->getInputFormat(format, *in_ptr, sample_block, max_block_size);
|
2017-04-01 07:20:54 +00:00
|
|
|
last_modification = getLastModification();
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return std::make_shared<OwningBlockInputStream<ReadBuffer>>(stream, std::move(in_ptr));
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string FileDictionarySource::toString() const
|
|
|
|
{
|
2020-08-27 18:51:19 +00:00
|
|
|
return fmt::format("File: {}, {}", filepath, format);
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Poco::Timestamp FileDictionarySource::getLastModification() const
|
|
|
|
{
|
2019-12-10 17:27:29 +00:00
|
|
|
return Poco::File{filepath}.getLastModified();
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
2018-11-28 11:37:12 +00:00
|
|
|
void registerDictionarySourceFile(DictionarySourceFactory & factory)
|
|
|
|
{
|
2020-03-23 02:12:31 +00:00
|
|
|
auto create_table_source = [=](const DictionaryStructure & dict_struct,
|
2018-11-28 11:37:12 +00:00
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
|
|
|
const std::string & config_prefix,
|
|
|
|
Block & sample_block,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr context,
|
2020-08-15 03:10:57 +00:00
|
|
|
const std::string & /* default_database */,
|
2019-12-10 17:27:29 +00:00
|
|
|
bool check_config) -> DictionarySourcePtr
|
2019-01-29 19:05:32 +00:00
|
|
|
{
|
2018-11-28 11:37:12 +00:00
|
|
|
if (dict_struct.has_expressions)
|
2021-04-10 18:48:36 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary source of type `file` does not support attribute expressions");
|
2018-11-28 11:37:12 +00:00
|
|
|
|
2019-12-10 17:27:29 +00:00
|
|
|
const auto filepath = config.getString(config_prefix + ".file.path");
|
2018-11-28 11:37:12 +00:00
|
|
|
const auto format = config.getString(config_prefix + ".file.format");
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
|
2020-04-02 22:35:22 +00:00
|
|
|
|
|
|
|
return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context_local_copy, check_config);
|
2018-11-28 11:37:12 +00:00
|
|
|
};
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
factory.registerSource("file", create_table_source);
|
2018-11-28 11:37:12 +00:00
|
|
|
}
|
|
|
|
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|