ClickHouse/src/Dictionaries/FileDictionarySource.cpp

111 lines
3.9 KiB
C++
Raw Normal View History

#include "FileDictionarySource.h"
#include <Poco/File.h>
#include <filesystem>
2017-05-25 19:26:17 +00:00
#include <DataStreams/OwningBlockInputStream.h>
#include <IO/ReadBufferFromFile.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
2020-12-07 23:10:22 +00:00
#include <common/logger_useful.h>
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
2019-12-15 06:34:43 +00:00
#include "registerDictionaries.h"
#include "DictionarySourceHelpers.h"
2016-12-08 02:49:04 +00:00
namespace DB
{
2019-02-10 16:55:12 +00:00
static const UInt64 max_block_size = 8192;
2016-12-08 02:49:04 +00:00
namespace ErrorCodes
{
2020-02-25 18:02:41 +00:00
extern const int LOGICAL_ERROR;
extern const int PATH_ACCESS_DENIED;
}
2016-12-08 02:49:04 +00:00
FileDictionarySource::FileDictionarySource(
const std::string & filepath_, const std::string & format_,
Block & sample_block_, const Context & context_, bool check_config)
: filepath{filepath_}
, format{format_}
, sample_block{sample_block_}
, context(context_)
{
if (check_config)
{
auto source_file_path = std::filesystem::path(filepath);
auto source_file_absolute_path = std::filesystem::canonical(source_file_path);
String user_files_path_string_value = context.getUserFilesPath();
auto user_files_path = std::filesystem::path(user_files_path_string_value);
auto user_files_absolute_path = std::filesystem::canonical(user_files_path);
auto [_, user_files_absolute_path_mismatch_it] = std::mismatch(source_file_absolute_path.begin(), source_file_absolute_path.end(), user_files_absolute_path.begin(), user_files_absolute_path.end());
bool user_files_absolute_path_include_source_file_absolute_path = user_files_absolute_path_mismatch_it == user_files_absolute_path.end();
if (!user_files_absolute_path_include_source_file_absolute_path)
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path_string_value);
}
}
2016-12-08 02:49:04 +00:00
FileDictionarySource::FileDictionarySource(const FileDictionarySource & other)
: filepath{other.filepath}
, format{other.format}
, sample_block{other.sample_block}
, context(other.context)
, last_modification{other.last_modification}
{
}
2016-12-08 02:49:04 +00:00
BlockInputStreamPtr FileDictionarySource::loadAll()
{
2020-05-23 22:24:01 +00:00
LOG_TRACE(&Poco::Logger::get("FileDictionary"), "loadAll {}", toString());
auto in_ptr = std::make_unique<ReadBufferFromFile>(filepath);
auto stream = context.getInputFormat(format, *in_ptr, sample_block, max_block_size);
last_modification = getLastModification();
2016-12-08 02:49:04 +00:00
return std::make_shared<OwningBlockInputStream<ReadBuffer>>(stream, std::move(in_ptr));
2016-12-08 02:49:04 +00:00
}
std::string FileDictionarySource::toString() const
{
2020-08-27 18:51:19 +00:00
return fmt::format("File: {}, {}", filepath, format);
2016-12-08 02:49:04 +00:00
}
Poco::Timestamp FileDictionarySource::getLastModification() const
{
return Poco::File{filepath}.getLastModified();
2016-12-08 02:49:04 +00:00
}
void registerDictionarySourceFile(DictionarySourceFactory & factory)
{
auto create_table_source = [=](const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
const Context & context,
2020-08-15 03:10:57 +00:00
const std::string & /* default_database */,
bool check_config) -> DictionarySourcePtr
{
if (dict_struct.has_expressions)
throw Exception{"Dictionary source of type `file` does not support attribute expressions", ErrorCodes::LOGICAL_ERROR};
const auto filepath = config.getString(config_prefix + ".file.path");
const auto format = config.getString(config_prefix + ".file.format");
Context context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context_local_copy, check_config);
};
factory.registerSource("file", create_table_source);
}
2016-12-08 02:49:04 +00:00
}