2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/StorageSet.h>
|
2017-12-30 00:36:06 +00:00
|
|
|
#include <Storages/StorageFactory.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadBufferFromFile.h>
|
2018-12-28 18:15:26 +00:00
|
|
|
#include <Compression/CompressedReadBuffer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBufferFromFile.h>
|
2018-12-28 18:15:26 +00:00
|
|
|
#include <Compression/CompressedWriteBuffer.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/NativeBlockOutputStream.h>
|
|
|
|
#include <DataStreams/NativeBlockInputStream.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
2018-01-15 19:07:47 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/Set.h>
|
2015-04-16 06:12:35 +00:00
|
|
|
#include <Poco/DirectoryIterator.h>
|
2015-01-27 00:52:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-12-30 00:36:06 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
|
|
}
|
|
|
|
|
2015-01-27 00:52:03 +00:00
|
|
|
|
2017-11-03 19:53:10 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int INCORRECT_FILE_NAME;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-10-25 06:49:24 +00:00
|
|
|
class SetOrJoinBlockOutputStream : public IBlockOutputStream
|
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
SetOrJoinBlockOutputStream(StorageSetOrJoinBase & table_,
|
|
|
|
const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_);
|
2016-10-25 06:49:24 +00:00
|
|
|
|
2018-02-19 00:45:32 +00:00
|
|
|
Block getHeader() const override { return table.getSampleBlock(); }
|
2017-04-01 07:20:54 +00:00
|
|
|
void write(const Block & block) override;
|
|
|
|
void writeSuffix() override;
|
2016-10-25 06:49:24 +00:00
|
|
|
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
StorageSetOrJoinBase & table;
|
|
|
|
String backup_path;
|
|
|
|
String backup_tmp_path;
|
|
|
|
String backup_file_name;
|
|
|
|
WriteBufferFromFile backup_buf;
|
|
|
|
CompressedWriteBuffer compressed_backup_buf;
|
|
|
|
NativeBlockOutputStream backup_stream;
|
2016-10-25 06:49:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2015-01-28 00:08:45 +00:00
|
|
|
SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream(StorageSetOrJoinBase & table_,
|
2017-04-01 07:20:54 +00:00
|
|
|
const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_)
|
|
|
|
: table(table_),
|
|
|
|
backup_path(backup_path_), backup_tmp_path(backup_tmp_path_),
|
|
|
|
backup_file_name(backup_file_name_),
|
|
|
|
backup_buf(backup_tmp_path + backup_file_name),
|
|
|
|
compressed_backup_buf(backup_buf),
|
2018-02-19 00:45:32 +00:00
|
|
|
backup_stream(compressed_backup_buf, 0, table.getSampleBlock())
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2015-01-28 00:08:45 +00:00
|
|
|
}
|
2015-01-27 00:52:03 +00:00
|
|
|
|
2015-01-28 00:08:45 +00:00
|
|
|
void SetOrJoinBlockOutputStream::write(const Block & block)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Sort columns in the block. This is necessary, since Set and Join count on the same column order in different blocks.
|
|
|
|
Block sorted_block = block.sortColumns();
|
2015-01-30 18:57:44 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
table.insertBlock(sorted_block);
|
|
|
|
backup_stream.write(sorted_block);
|
2015-01-28 00:08:45 +00:00
|
|
|
}
|
2015-01-27 00:52:03 +00:00
|
|
|
|
2015-01-28 00:08:45 +00:00
|
|
|
void SetOrJoinBlockOutputStream::writeSuffix()
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
backup_stream.flush();
|
|
|
|
compressed_backup_buf.next();
|
|
|
|
backup_buf.next();
|
2015-01-27 00:52:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::File(backup_tmp_path + backup_file_name).renameTo(backup_path + backup_file_name);
|
2015-01-28 00:08:45 +00:00
|
|
|
}
|
2015-01-27 00:52:03 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2017-12-01 21:13:25 +00:00
|
|
|
BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const Settings & /*settings*/)
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2018-08-08 03:12:35 +00:00
|
|
|
UInt64 id = ++increment;
|
|
|
|
return std::make_shared<SetOrJoinBlockOutputStream>(*this, path, path + "tmp/", toString(id) + ".bin");
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-28 00:08:45 +00:00
|
|
|
StorageSetOrJoinBase::StorageSetOrJoinBase(
|
2017-04-01 07:20:54 +00:00
|
|
|
const String & path_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const String & table_name_,
|
|
|
|
const ColumnsDescription & columns_)
|
|
|
|
: IStorage{columns_}, table_name(table_name_)
|
2015-01-28 00:08:45 +00:00
|
|
|
{
|
2017-11-03 19:53:10 +00:00
|
|
|
if (path_.empty())
|
|
|
|
throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME);
|
|
|
|
|
2018-03-06 20:18:34 +00:00
|
|
|
path = path_ + escapeForFileName(table_name_) + '/';
|
2015-01-28 00:08:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StorageSet::StorageSet(
|
2017-04-01 07:20:54 +00:00
|
|
|
const String & path_,
|
|
|
|
const String & name_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const ColumnsDescription & columns_)
|
|
|
|
: StorageSetOrJoinBase{path_, name_, columns_},
|
2018-07-02 18:57:14 +00:00
|
|
|
set(std::make_shared<Set>(SizeLimits(), false))
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2018-04-19 21:34:04 +00:00
|
|
|
Block header = getSampleBlock();
|
|
|
|
header = header.sortColumns();
|
|
|
|
set->setHeader(header);
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
restore();
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-07-02 18:57:14 +00:00
|
|
|
void StorageSet::insertBlock(const Block & block) { set->insertFromBlock(block); }
|
2018-04-21 00:35:20 +00:00
|
|
|
size_t StorageSet::getSize() const { return set->getTotalRowCount(); }
|
|
|
|
|
2018-06-09 15:48:22 +00:00
|
|
|
|
2018-12-28 13:39:44 +00:00
|
|
|
void StorageSet::truncate(const ASTPtr &, const Context &)
|
2018-04-21 00:35:20 +00:00
|
|
|
{
|
2018-06-09 18:17:27 +00:00
|
|
|
Poco::File(path).remove(true);
|
|
|
|
Poco::File(path).createDirectories();
|
|
|
|
Poco::File(path + "tmp/").createDirectories();
|
|
|
|
|
2018-05-05 16:14:06 +00:00
|
|
|
Block header = getSampleBlock();
|
|
|
|
header = header.sortColumns();
|
2018-06-09 15:48:22 +00:00
|
|
|
|
2018-04-21 00:35:20 +00:00
|
|
|
increment = 0;
|
2018-07-02 18:57:14 +00:00
|
|
|
set = std::make_shared<Set>(SizeLimits(), false);
|
2018-05-05 16:14:06 +00:00
|
|
|
set->setHeader(header);
|
2018-08-10 04:02:56 +00:00
|
|
|
}
|
2017-01-14 09:00:19 +00:00
|
|
|
|
|
|
|
|
2015-01-28 00:08:45 +00:00
|
|
|
void StorageSetOrJoinBase::restore()
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::File tmp_dir(path + "tmp/");
|
|
|
|
if (!tmp_dir.exists())
|
|
|
|
{
|
|
|
|
tmp_dir.createDirectories();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const auto file_suffix = ".bin";
|
|
|
|
static const auto file_suffix_size = strlen(".bin");
|
|
|
|
|
|
|
|
Poco::DirectoryIterator dir_end;
|
|
|
|
for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it)
|
|
|
|
{
|
|
|
|
const auto & name = dir_it.name();
|
|
|
|
|
|
|
|
if (dir_it->isFile()
|
|
|
|
&& endsWith(name, file_suffix)
|
|
|
|
&& dir_it->getSize() > 0)
|
|
|
|
{
|
|
|
|
/// Calculate the maximum number of available files with a backup to add the following files with large numbers.
|
|
|
|
UInt64 file_num = parse<UInt64>(name.substr(0, name.size() - file_suffix_size));
|
|
|
|
if (file_num > increment)
|
|
|
|
increment = file_num;
|
|
|
|
|
|
|
|
restoreFromFile(dir_it->path());
|
|
|
|
}
|
|
|
|
}
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-05-28 03:49:28 +00:00
|
|
|
void StorageSetOrJoinBase::restoreFromFile(const String & file_path)
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
ReadBufferFromFile backup_buf(file_path);
|
|
|
|
CompressedReadBuffer compressed_backup_buf(backup_buf);
|
2018-02-18 02:46:39 +00:00
|
|
|
NativeBlockInputStream backup_stream(compressed_backup_buf, 0);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
backup_stream.readPrefix();
|
|
|
|
while (Block block = backup_stream.read())
|
|
|
|
insertBlock(block);
|
|
|
|
backup_stream.readSuffix();
|
|
|
|
|
|
|
|
/// TODO Add speed, compressed bytes, data volume in memory, compression ratio ... Generalize all statistics logging in project.
|
|
|
|
LOG_INFO(&Logger::get("StorageSetOrJoinBase"), std::fixed << std::setprecision(2)
|
|
|
|
<< "Loaded from backup file " << file_path << ". "
|
|
|
|
<< backup_stream.getProfileInfo().rows << " rows, "
|
|
|
|
<< backup_stream.getProfileInfo().bytes / 1048576.0 << " MiB. "
|
|
|
|
<< "State has " << getSize() << " unique rows.");
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-12-01 21:13:25 +00:00
|
|
|
void StorageSetOrJoinBase::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name)
|
2015-01-27 00:52:03 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Rename directory with data.
|
|
|
|
String new_path = new_path_to_db + escapeForFileName(new_table_name);
|
|
|
|
Poco::File(path).renameTo(new_path);
|
2015-01-27 00:52:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
path = new_path + "/";
|
2018-03-06 20:18:34 +00:00
|
|
|
table_name = new_table_name;
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-12-30 00:36:06 +00:00
|
|
|
void registerStorageSet(StorageFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerStorage("Set", [](const StorageFactory::Arguments & args)
|
|
|
|
{
|
|
|
|
if (!args.engine_args.empty())
|
|
|
|
throw Exception(
|
|
|
|
"Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
2018-03-06 20:18:34 +00:00
|
|
|
return StorageSet::create(args.data_path, args.table_name, args.columns);
|
2017-12-30 00:36:06 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-01-27 00:52:03 +00:00
|
|
|
}
|