ClickHouse/src/Storages/StorageSet.cpp

265 lines
8.3 KiB
C++
Raw Normal View History

#include <Storages/StorageSet.h>
#include <Storages/StorageFactory.h>
#include <IO/ReadBufferFromFile.h>
2018-12-28 18:15:26 +00:00
#include <Compression/CompressedReadBuffer.h>
#include <IO/WriteBufferFromFile.h>
2018-12-28 18:15:26 +00:00
#include <Compression/CompressedWriteBuffer.h>
2021-10-08 17:21:19 +00:00
#include <DataStreams/NativeWriter.h>
#include <DataStreams/NativeReader.h>
2021-10-14 10:25:43 +00:00
#include <DataStreams/BlockStreamProfileInfo.h>
2020-12-15 16:45:13 +00:00
#include <Disks/IDisk.h>
2020-05-23 17:25:19 +00:00
#include <Common/formatReadable.h>
#include <Common/StringUtils/StringUtils.h>
#include <Interpreters/Set.h>
#include <Interpreters/Context.h>
#include <Processors/Sinks/SinkToStorage.h>
2020-09-13 15:11:18 +00:00
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTLiteral.h>
2021-06-18 11:07:41 +00:00
#include <filesystem>
namespace fs = std::filesystem;
2015-01-27 00:52:03 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
2015-01-27 00:52:03 +00:00
namespace ErrorCodes
{
extern const int INCORRECT_FILE_NAME;
}
2021-07-23 14:25:35 +00:00
class SetOrJoinSink : public SinkToStorage
2016-10-25 06:49:24 +00:00
{
public:
2021-07-23 14:25:35 +00:00
SetOrJoinSink(
StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_,
const String & backup_path_, const String & backup_tmp_path_,
2020-09-18 12:58:27 +00:00
const String & backup_file_name_, bool persistent_);
2016-10-25 06:49:24 +00:00
2021-07-23 19:33:59 +00:00
String getName() const override { return "SetOrJoinSink"; }
2021-07-23 14:25:35 +00:00
void consume(Chunk chunk) override;
void onFinish() override;
2016-10-25 06:49:24 +00:00
private:
StorageSetOrJoinBase & table;
StorageMetadataPtr metadata_snapshot;
String backup_path;
String backup_tmp_path;
String backup_file_name;
2020-12-15 16:45:13 +00:00
std::unique_ptr<WriteBufferFromFileBase> backup_buf;
CompressedWriteBuffer compressed_backup_buf;
2021-10-08 17:21:19 +00:00
NativeWriter backup_stream;
2020-09-18 12:58:27 +00:00
bool persistent;
2016-10-25 06:49:24 +00:00
};
2021-07-23 14:25:35 +00:00
SetOrJoinSink::SetOrJoinSink(
StorageSetOrJoinBase & table_,
const StorageMetadataPtr & metadata_snapshot_,
const String & backup_path_,
const String & backup_tmp_path_,
2020-09-13 15:11:18 +00:00
const String & backup_file_name_,
2020-09-18 12:58:27 +00:00
bool persistent_)
2021-07-26 10:08:40 +00:00
: SinkToStorage(metadata_snapshot_->getSampleBlock())
2021-07-23 14:25:35 +00:00
, table(table_)
, metadata_snapshot(metadata_snapshot_)
, backup_path(backup_path_)
, backup_tmp_path(backup_tmp_path_)
, backup_file_name(backup_file_name_)
2021-06-18 11:07:41 +00:00
, backup_buf(table_.disk->writeFile(fs::path(backup_tmp_path) / backup_file_name))
2020-12-15 16:45:13 +00:00
, compressed_backup_buf(*backup_buf)
, backup_stream(compressed_backup_buf, 0, metadata_snapshot->getSampleBlock())
2020-09-18 12:58:27 +00:00
, persistent(persistent_)
2015-01-27 00:52:03 +00:00
{
}
2015-01-27 00:52:03 +00:00
2021-07-23 14:25:35 +00:00
void SetOrJoinSink::consume(Chunk chunk)
{
/// Sort columns in the block. This is necessary, since Set and Join count on the same column order in different blocks.
Block sorted_block = getHeader().cloneWithColumns(chunk.detachColumns()).sortColumns();
table.insertBlock(sorted_block);
2020-09-18 12:58:27 +00:00
if (persistent)
2020-09-13 15:11:18 +00:00
backup_stream.write(sorted_block);
}
2015-01-27 00:52:03 +00:00
2021-07-23 14:25:35 +00:00
void SetOrJoinSink::onFinish()
{
2019-11-01 10:58:29 +00:00
table.finishInsert();
2020-09-18 12:58:27 +00:00
if (persistent)
2020-09-13 15:11:18 +00:00
{
backup_stream.flush();
compressed_backup_buf.next();
2020-12-15 16:45:13 +00:00
backup_buf->next();
backup_buf->finalize();
2015-01-27 00:52:03 +00:00
2021-06-18 11:07:41 +00:00
table.disk->replaceFile(fs::path(backup_tmp_path) / backup_file_name, fs::path(backup_path) / backup_file_name);
2020-09-13 15:11:18 +00:00
}
}
2015-01-27 00:52:03 +00:00
2021-07-23 14:25:35 +00:00
SinkToStoragePtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/)
2015-01-27 00:52:03 +00:00
{
UInt64 id = ++increment;
return std::make_shared<SetOrJoinSink>(*this, metadata_snapshot, path, fs::path(path) / "tmp/", toString(id) + ".bin", persistent);
2015-01-27 00:52:03 +00:00
}
StorageSetOrJoinBase::StorageSetOrJoinBase(
2020-12-15 16:45:13 +00:00
DiskPtr disk_,
2019-10-25 19:07:47 +00:00
const String & relative_path_,
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
2019-08-24 21:20:20 +00:00
const ColumnsDescription & columns_,
2019-10-25 19:07:47 +00:00
const ConstraintsDescription & constraints_,
2021-04-23 12:18:23 +00:00
const String & comment,
2020-09-18 12:58:27 +00:00
bool persistent_)
2021-04-23 12:18:23 +00:00
: IStorage(table_id_), disk(disk_), persistent(persistent_)
{
2020-06-19 15:39:41 +00:00
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
storage_metadata.setConstraints(constraints_);
2021-04-23 12:18:23 +00:00
storage_metadata.setComment(comment);
2020-06-19 15:39:41 +00:00
setInMemoryMetadata(storage_metadata);
2019-08-24 21:20:20 +00:00
2019-10-25 19:07:47 +00:00
if (relative_path_.empty())
throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME);
2020-12-15 16:45:13 +00:00
path = relative_path_;
}
StorageSet::StorageSet(
2020-12-15 16:45:13 +00:00
DiskPtr disk_,
2019-10-25 19:07:47 +00:00
const String & relative_path_,
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
2019-08-24 21:20:20 +00:00
const ColumnsDescription & columns_,
2019-10-25 19:07:47 +00:00
const ConstraintsDescription & constraints_,
2021-04-23 12:18:23 +00:00
const String & comment,
2020-09-18 12:58:27 +00:00
bool persistent_)
2021-04-23 12:18:23 +00:00
: StorageSetOrJoinBase{disk_, relative_path_, table_id_, columns_, constraints_, comment, persistent_}
, set(std::make_shared<Set>(SizeLimits(), false, true))
2015-01-27 00:52:03 +00:00
{
Block header = getInMemoryMetadataPtr()->getSampleBlock();
2018-04-19 21:34:04 +00:00
header = header.sortColumns();
set->setHeader(header.getColumnsWithTypeAndName());
2018-04-19 21:34:04 +00:00
restore();
2015-01-27 00:52:03 +00:00
}
void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block.getColumnsWithTypeAndName()); }
2019-11-01 10:58:29 +00:00
void StorageSet::finishInsert() { set->finishInsert(); }
2018-04-21 00:35:20 +00:00
size_t StorageSet::getSize() const { return set->getTotalRowCount(); }
2020-11-25 13:47:32 +00:00
std::optional<UInt64> StorageSet::totalRows(const Settings &) const { return set->getTotalRowCount(); }
std::optional<UInt64> StorageSet::totalBytes(const Settings &) const { return set->getTotalByteCount(); }
2018-06-09 15:48:22 +00:00
void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &)
2018-04-21 00:35:20 +00:00
{
2020-12-15 16:45:13 +00:00
disk->removeRecursive(path);
disk->createDirectories(path);
2021-06-18 11:07:41 +00:00
disk->createDirectories(fs::path(path) / "tmp/");
Block header = metadata_snapshot->getSampleBlock();
header = header.sortColumns();
2018-06-09 15:48:22 +00:00
2018-04-21 00:35:20 +00:00
increment = 0;
set = std::make_shared<Set>(SizeLimits(), false, true);
set->setHeader(header.getColumnsWithTypeAndName());
}
Squashed commit of the following: commit e712f469a55ff34ad34b482b15cc4153b7ad7233 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:59:13 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a002823084e3a79bffcc17d479620a68eb0644b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:58:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9e06f407c8ee781ed8ddf98bdfcc31846bf2a0fe Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:55:14 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 9581620f1e839f456fa7894aa1f996d5162ac6cd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:54:22 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2a8564c68cb6cc3649fafaf401256d43c9a2e777 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:47:34 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit cf60632d78ec656be3304ef4565e859bb6ce80ba Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:40:09 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit ee3d1dc6e0c4ca60e3ac1e0c30d4b3ed1e66eca0 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:22:49 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 65592ef7116a90104fcd524b53ef8b7cf22640f2 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:18:17 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 37972c257320d3b7e7b294e0fdeffff218647bfd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:17:06 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit dd909d149974ce5bed2456de1261aa5a368fd3ff Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:16:28 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 3cf43266ca7e30adf01212b1a739ba5fe43639fd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:15:42 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 6731a3df96d1609286e2536b6432916af7743f0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:13:35 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 1b5727e0d56415b7add4cb76110105358663602c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:11:18 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit bbcf726a55685b8e72f5b40ba0bf1904bd1c0407 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:09:04 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit c03b477d5e2e65014e8906ecfa2efb67ee295af1 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:06:30 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 2986e2fb0466bc18d73693dcdded28fccc0dc66b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:05:44 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit 5d6cdef13d2e02bd5c4954983334e9162ab2635b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:04:53 2017 +0300 Less dependencies [#CLICKHOUSE-2] commit f2b819b25ce8b2ccdcb201eefb03e1e6f5aab590 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Jan 14 11:01:47 2017 +0300 Less dependencies [#CLICKHOUSE-2]
2017-01-14 09:00:19 +00:00
void StorageSetOrJoinBase::restore()
2015-01-27 00:52:03 +00:00
{
2021-06-18 11:07:41 +00:00
if (!disk->exists(fs::path(path) / "tmp/"))
{
2021-06-18 11:07:41 +00:00
disk->createDirectories(fs::path(path) / "tmp/");
return;
}
2020-04-22 06:22:14 +00:00
static const char * file_suffix = ".bin";
static const auto file_suffix_size = strlen(".bin");
2020-12-15 16:45:13 +00:00
for (auto dir_it{disk->iterateDirectory(path)}; dir_it->isValid(); dir_it->next())
{
2020-12-15 16:45:13 +00:00
const auto & name = dir_it->name();
const auto & file_path = dir_it->path();
2020-12-15 16:45:13 +00:00
if (disk->isFile(file_path)
&& endsWith(name, file_suffix)
2020-12-15 16:45:13 +00:00
&& disk->getFileSize(file_path) > 0)
{
/// Calculate the maximum number of available files with a backup to add the following files with large numbers.
UInt64 file_num = parse<UInt64>(name.substr(0, name.size() - file_suffix_size));
if (file_num > increment)
increment = file_num;
restoreFromFile(dir_it->path());
}
}
2015-01-27 00:52:03 +00:00
}
void StorageSetOrJoinBase::restoreFromFile(const String & file_path)
2015-01-27 00:52:03 +00:00
{
2020-12-15 16:45:13 +00:00
auto backup_buf = disk->readFile(file_path);
CompressedReadBuffer compressed_backup_buf(*backup_buf);
2021-10-08 17:21:19 +00:00
NativeReader backup_stream(compressed_backup_buf, 0);
2019-11-01 10:58:29 +00:00
2021-10-08 17:21:19 +00:00
BlockStreamProfileInfo info;
while (Block block = backup_stream.read())
2021-10-08 17:21:19 +00:00
{
info.update(block);
insertBlock(block);
2021-10-08 17:21:19 +00:00
}
2019-11-01 10:58:29 +00:00
finishInsert();
/// TODO Add speed, compressed bytes, data volume in memory, compression ratio ... Generalize all statistics logging in project.
2020-05-30 21:57:37 +00:00
LOG_INFO(&Poco::Logger::get("StorageSetOrJoinBase"), "Loaded from backup file {}. {} rows, {}. State has {} unique rows.",
2021-10-08 17:21:19 +00:00
file_path, info.rows, ReadableSize(info.bytes), getSize());
2015-01-27 00:52:03 +00:00
}
2020-04-07 14:05:51 +00:00
void StorageSetOrJoinBase::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
2015-01-27 00:52:03 +00:00
{
/// Rename directory with data.
2020-12-15 16:45:13 +00:00
disk->replaceFile(path, new_path_to_table_data);
2015-01-27 00:52:03 +00:00
2020-12-15 16:45:13 +00:00
path = new_path_to_table_data;
2020-04-07 14:05:51 +00:00
renameInMemory(new_table_id);
2015-01-27 00:52:03 +00:00
}
void registerStorageSet(StorageFactory & factory)
{
factory.registerStorage("Set", [](const StorageFactory::Arguments & args)
{
if (!args.engine_args.empty())
throw Exception(
"Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
2020-09-15 09:16:10 +00:00
bool has_settings = args.storage_def->settings;
2021-02-07 01:41:31 +00:00
SetSettings set_settings;
2020-09-15 09:16:10 +00:00
if (has_settings)
2021-02-07 01:41:31 +00:00
set_settings.loadFromQuery(*args.storage_def);
2020-09-13 15:11:18 +00:00
DiskPtr disk = args.getContext()->getDisk(set_settings.disk);
2021-04-23 12:18:23 +00:00
return StorageSet::create(
disk, args.relative_data_path, args.table_id, args.columns, args.constraints, args.comment, set_settings.persistent);
2020-09-13 15:11:18 +00:00
}, StorageFactory::StorageFeatures{ .supports_settings = true, });
}
2015-01-27 00:52:03 +00:00
}