Keep indices for StorageStripeLog in memory.

This commit is contained in:
Vitaly Baranov 2021-08-27 01:15:24 +03:00
parent 42596b16bc
commit 0e8c9b089f
10 changed files with 359 additions and 175 deletions

View File

@ -50,11 +50,6 @@ void FileChecker::setEmpty(const String & full_file_path)
map[fileName(full_file_path)] = 0;
}
const FileChecker::Map & FileChecker::getFileSizes() const
{
return map;
}
size_t FileChecker::getFileSize(const String & full_file_path) const
{
auto it = map.find(fileName(full_file_path));

View File

@ -28,11 +28,6 @@ public:
/// The purpose of this function is to rollback a group of unfinished writes.
void repair();
/// File name -> size.
using Map = std::map<String, UInt64>;
const Map & getFileSizes() const;
/// Returns stored file size.
size_t getFileSize(const String & full_file_path) const;
@ -43,7 +38,7 @@ private:
const Poco::Logger * log = &Poco::Logger::get("FileChecker");
String files_info_path;
Map map;
std::map<String, size_t> map;
};
}

View File

@ -0,0 +1,91 @@
#include <Formats/IndexForNativeFormat.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_INDEX;
}
void IndexOfBlockForNativeFormat::read(ReadBuffer & istr)
{
readVarUInt(num_columns, istr);
readVarUInt(num_rows, istr);
columns.clear();
for (size_t i = 0; i < num_columns; ++i)
{
auto & column = columns.emplace_back();
readBinary(column.name, istr);
readBinary(column.type, istr);
readBinary(column.location.offset_in_compressed_file, istr);
readBinary(column.location.offset_in_decompressed_block, istr);
}
}
void IndexOfBlockForNativeFormat::write(WriteBuffer & ostr) const
{
writeVarUInt(num_columns, ostr);
writeVarUInt(num_rows, ostr);
for (size_t i = 0; i < num_columns; ++i)
{
const auto & column = columns[i];
writeBinary(column.name, ostr);
writeBinary(column.type, ostr);
writeBinary(column.location.offset_in_compressed_file, ostr);
writeBinary(column.location.offset_in_decompressed_block, ostr);
}
}
IndexOfBlockForNativeFormat IndexOfBlockForNativeFormat::extractIndexForColumns(const NameSet & required_columns) const
{
if (num_columns < required_columns.size())
throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);
IndexOfBlockForNativeFormat res;
for (size_t i = 0; i < num_columns; ++i)
{
const auto & column = columns[i];
if (required_columns.contains(column.name))
res.columns.push_back(column);
}
if (res.columns.size() < required_columns.size())
throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);
if (res.columns.size() > required_columns.size())
throw Exception("Index contain duplicate columns", ErrorCodes::INCORRECT_INDEX);
res.num_columns = res.columns.size();
res.num_rows = num_rows;
return res;
}
void IndexForNativeFormat::read(ReadBuffer & istr)
{
blocks.clear();
while (!istr.eof())
{
auto & block = blocks.emplace_back();
block.read(istr);
}
}
void IndexForNativeFormat::write(WriteBuffer & ostr) const
{
for (const auto & block : blocks)
block.write(ostr);
}
IndexForNativeFormat IndexForNativeFormat::extractIndexForColumns(const NameSet & required_columns) const
{
IndexForNativeFormat res;
res.blocks.reserve(blocks.size());
for (const auto & block : blocks)
res.blocks.emplace_back(block.extractIndexForColumns(required_columns));
return res;
}
}

View File

@ -0,0 +1,60 @@
#pragma once
#include <Core/Names.h>
#include <Formats/MarkInCompressedFile.h>
namespace DB
{
/** The Native format can contain a separately located index,
* which allows you to understand where what column is located,
* and skip unnecessary columns.
*/
/** The position of one piece of a single column. */
struct IndexOfOneColumnForNativeFormat
{
String name;
String type;
MarkInCompressedFile location;
};
/** The index for the data block. */
struct IndexOfBlockForNativeFormat
{
using Columns = std::vector<IndexOfOneColumnForNativeFormat>;
size_t num_columns;
size_t num_rows;
Columns columns;
/// Reads the index for the data block.
void read(ReadBuffer & istr);
/// Writes the index for the data block.
void write(WriteBuffer & ostr) const;
/// Returns the index only for the required columns.
IndexOfBlockForNativeFormat extractIndexForColumns(const NameSet & required_columns) const;
};
/** The whole index. */
struct IndexForNativeFormat
{
using Blocks = std::vector<IndexOfBlockForNativeFormat>;
Blocks blocks;
bool empty() const { return blocks.empty(); }
void clear() { blocks.clear(); }
/// Reads the index.
void read(ReadBuffer & istr);
/// Writes the index.
void write(WriteBuffer & ostr) const;
/// Returns the index only for the required columns.
IndexForNativeFormat extractIndexForColumns(const NameSet & required_columns) const;
};
}

View File

@ -221,39 +221,4 @@ void NativeReader::updateAvgValueSizeHints(const Block & block)
}
}
void IndexForNativeFormat::read(ReadBuffer & istr, const NameSet & required_columns)
{
while (!istr.eof())
{
blocks.emplace_back();
IndexOfBlockForNativeFormat & block = blocks.back();
readVarUInt(block.num_columns, istr);
readVarUInt(block.num_rows, istr);
if (block.num_columns < required_columns.size())
throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);
for (size_t i = 0; i < block.num_columns; ++i)
{
IndexOfOneColumnForNativeFormat column_index;
readBinary(column_index.name, istr);
readBinary(column_index.type, istr);
readBinary(column_index.location.offset_in_compressed_file, istr);
readBinary(column_index.location.offset_in_decompressed_block, istr);
if (required_columns.count(column_index.name))
block.columns.push_back(std::move(column_index));
}
if (block.columns.size() < required_columns.size())
throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX);
if (block.columns.size() > required_columns.size())
throw Exception("Index contain duplicate columns", ErrorCodes::INCORRECT_INDEX);
block.num_columns = block.columns.size();
}
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <Formats/IndexForNativeFormat.h>
#include <Formats/MarkInCompressedFile.h>
#include <Common/PODArray.h>
#include <Core/Block.h>
@ -9,48 +10,6 @@ namespace DB
class CompressedReadBufferFromFile;
/** The Native format can contain a separately located index,
* which allows you to understand where what column is located,
* and skip unnecessary columns.
*/
/** The position of one piece of a single column. */
struct IndexOfOneColumnForNativeFormat
{
String name;
String type;
MarkInCompressedFile location;
};
/** The index for the data block. */
struct IndexOfBlockForNativeFormat
{
using Columns = std::vector<IndexOfOneColumnForNativeFormat>;
size_t num_columns;
size_t num_rows;
Columns columns;
};
/** The whole index. */
struct IndexForNativeFormat
{
using Blocks = std::vector<IndexOfBlockForNativeFormat>;
Blocks blocks;
IndexForNativeFormat() {}
IndexForNativeFormat(ReadBuffer & istr, const NameSet & required_columns)
{
read(istr, required_columns);
}
/// Read the index, only for the required columns.
void read(ReadBuffer & istr, const NameSet & required_columns);
};
/** Deserializes the stream of blocks from the native binary format (with names and column types).
* Designed for communication between servers.
*

View File

@ -5,6 +5,7 @@
#include <IO/VarInt.h>
#include <Compression/CompressedWriteBuffer.h>
#include <Formats/IndexForNativeFormat.h>
#include <Formats/MarkInCompressedFile.h>
#include <Formats/NativeWriter.h>
@ -22,11 +23,11 @@ namespace ErrorCodes
NativeWriter::NativeWriter(
WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_,
WriteBuffer * index_ostr_, size_t initial_size_of_file_)
IndexForNativeFormat * index_, size_t initial_size_of_file_)
: ostr(ostr_), client_revision(client_revision_), header(header_),
index_ostr(index_ostr_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_)
index(index_), initial_size_of_file(initial_size_of_file_), remove_low_cardinality(remove_low_cardinality_)
{
if (index_ostr)
if (index)
{
ostr_concrete = typeid_cast<CompressedWriteBuffer *>(&ostr);
if (!ostr_concrete)
@ -80,18 +81,20 @@ void NativeWriter::write(const Block & block)
/** The index has the same structure as the data stream.
* But instead of column values, it contains a mark that points to the location in the data file where this part of the column is located.
*/
if (index_ostr)
IndexOfBlockForNativeFormat index_block;
if (index)
{
writeVarUInt(columns, *index_ostr);
writeVarUInt(rows, *index_ostr);
index_block.num_columns = columns;
index_block.num_rows = rows;
index_block.columns.resize(columns);
}
for (size_t i = 0; i < columns; ++i)
{
/// For the index.
MarkInCompressedFile mark;
MarkInCompressedFile mark{0, 0};
if (index_ostr)
if (index)
{
ostr_concrete->next(); /// Finish compressed block.
mark.offset_in_compressed_file = initial_size_of_file + ostr_concrete->getCompressedBytes();
@ -125,15 +128,17 @@ void NativeWriter::write(const Block & block)
if (rows) /// Zero items of data is always represented as zero number of bytes.
writeData(*column.type, column.column, ostr, 0, 0);
if (index_ostr)
if (index)
{
writeStringBinary(column.name, *index_ostr);
writeStringBinary(column.type->getName(), *index_ostr);
writeBinary(mark.offset_in_compressed_file, *index_ostr);
writeBinary(mark.offset_in_decompressed_block, *index_ostr);
index_block.columns[i].name = column.name;
index_block.columns[i].type = column.type->getName();
index_block.columns[i].location.offset_in_compressed_file = mark.offset_in_compressed_file;
index_block.columns[i].location.offset_in_decompressed_block = mark.offset_in_decompressed_block;
}
}
if (index)
index->blocks.emplace_back(std::move(index_block));
}
}

View File

@ -9,7 +9,7 @@ namespace DB
class WriteBuffer;
class CompressedWriteBuffer;
struct IndexForNativeFormat;
/** Serializes the stream of blocks in their native binary format (with names and column types).
* Designed for communication between servers.
@ -24,7 +24,7 @@ public:
*/
NativeWriter(
WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, bool remove_low_cardinality_ = false,
WriteBuffer * index_ostr_ = nullptr, size_t initial_size_of_file_ = 0);
IndexForNativeFormat * index_ = nullptr, size_t initial_size_of_file_ = 0);
Block getHeader() const { return header; }
void write(const Block & block);
@ -36,7 +36,7 @@ private:
WriteBuffer & ostr;
UInt64 client_revision;
Block header;
WriteBuffer * index_ostr;
IndexForNativeFormat * index = nullptr;
size_t initial_size_of_file; /// The initial size of the data file, if `append` done. Used for the index.
/// If you need to write index, then `ostr` must be a CompressedWriteBuffer.
CompressedWriteBuffer * ostr_concrete = nullptr;

View File

@ -47,11 +47,13 @@ namespace ErrorCodes
}
/// NOTE: The lock `StorageStripeLog::rwlock` is NOT kept locked while reading,
/// because we read ranges of data that do not change.
class StripeLogSource final : public SourceWithProgress
{
public:
static Block getHeader(
StorageStripeLog & storage,
const StorageStripeLog & storage,
const StorageMetadataPtr & metadata_snapshot,
const Names & column_names,
IndexForNativeFormat::Blocks::const_iterator index_begin,
@ -74,19 +76,18 @@ public:
}
StripeLogSource(
StorageStripeLog & storage_,
const StorageStripeLog & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const Names & column_names,
ReadSettings read_settings_,
std::shared_ptr<const IndexForNativeFormat> & index_,
std::shared_ptr<const IndexForNativeFormat> indices_,
IndexForNativeFormat::Blocks::const_iterator index_begin_,
IndexForNativeFormat::Blocks::const_iterator index_end_)
: SourceWithProgress(
getHeader(storage_, metadata_snapshot_, column_names, index_begin_, index_end_))
: SourceWithProgress(getHeader(storage_, metadata_snapshot_, column_names, index_begin_, index_end_))
, storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, read_settings(std::move(read_settings_))
, index(index_)
, indices(indices_)
, index_begin(index_begin_)
, index_end(index_end_)
{
@ -109,7 +110,7 @@ protected:
{
block_in.reset();
data_in.reset();
index.reset();
indices.reset();
}
}
@ -117,13 +118,14 @@ protected:
}
private:
StorageStripeLog & storage;
const StorageStripeLog & storage;
StorageMetadataPtr metadata_snapshot;
ReadSettings read_settings;
std::shared_ptr<const IndexForNativeFormat> index;
std::shared_ptr<const IndexForNativeFormat> indices;
IndexForNativeFormat::Blocks::const_iterator index_begin;
IndexForNativeFormat::Blocks::const_iterator index_end;
Block header;
/** optional - to create objects only on first reading
@ -141,40 +143,45 @@ private:
started = true;
String data_file_path = storage.table_path + "data.bin";
data_in.emplace(storage.disk->readFile(data_file_path, read_settings.adjustBufferSize(storage.disk->getFileSize(data_file_path))));
/// We cannot just use `storage.file_checker` to get the size of the file here,
/// because `storage.rwlock` is not locked at this point.
size_t data_file_size = storage.disk->getFileSize(data_file_path);
data_in.emplace(storage.disk->readFile(data_file_path, read_settings.adjustBufferSize(data_file_size)));
block_in.emplace(*data_in, 0, index_begin, index_end);
}
}
};
/// NOTE: The lock `StorageStripeLog::rwlock` is kept locked in exclusive mode while writing.
class StripeLogSink final : public SinkToStorage
{
public:
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
explicit StripeLogSink(
StorageStripeLog & storage_, const StorageMetadataPtr & metadata_snapshot_, std::unique_lock<std::shared_timed_mutex> && lock_)
StorageStripeLog & storage_, const StorageMetadataPtr & metadata_snapshot_, WriteLock && lock_)
: SinkToStorage(metadata_snapshot_->getSampleBlock())
, storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, lock(std::move(lock_))
, data_out_file(storage.table_path + "data.bin")
, data_out_compressed(storage.disk->writeFile(data_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append))
, data_out_compressed(storage.disk->writeFile(storage.data_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append))
, data_out(std::make_unique<CompressedWriteBuffer>(
*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size))
, index_out_file(storage.table_path + "index.mrk")
, index_out_compressed(storage.disk->writeFile(index_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append))
, index_out(std::make_unique<CompressedWriteBuffer>(*index_out_compressed))
, block_out(*data_out, 0, metadata_snapshot->getSampleBlock(), false, index_out.get(), storage.disk->getFileSize(data_out_file))
*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size))
{
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
if (storage.file_checker.empty())
{
storage.file_checker.setEmpty(storage.table_path + "data.bin");
storage.file_checker.setEmpty(storage.table_path + "index.mrk");
storage.file_checker.save();
}
/// Ensure that indices are loaded because we're going to update them.
storage.loadIndices(lock);
/// If there were no files, save zero file sizes to be able to rollback in case of error.
storage.saveFileSizes(lock);
size_t initial_data_size = storage.file_checker.getFileSize(storage.data_file_path);
block_out = std::make_unique<NativeWriter>(*data_out, 0, metadata_snapshot->getSampleBlock(), false, &storage.indices, initial_data_size);
}
String getName() const override { return "StripeLogSink"; }
@ -186,12 +193,16 @@ public:
if (!done)
{
/// Rollback partial writes.
/// No more writing.
data_out.reset();
data_out_compressed.reset();
index_out.reset();
index_out_compressed.reset();
/// Truncate files to the older sizes.
storage.file_checker.repair();
/// Remove excessive indices.
storage.removeUnsavedIndices(lock);
}
}
catch (...)
@ -202,7 +213,7 @@ public:
void consume(Chunk chunk) override
{
block_out.write(getHeader().cloneWithColumns(chunk.detachColumns()));
block_out->write(getHeader().cloneWithColumns(chunk.detachColumns()));
}
void onFinish() override
@ -213,13 +224,12 @@ public:
data_out->next();
data_out_compressed->next();
data_out_compressed->finalize();
index_out->next();
index_out_compressed->next();
index_out_compressed->finalize();
storage.file_checker.update(data_out_file);
storage.file_checker.update(index_out_file);
storage.file_checker.save();
/// Save the new indices.
storage.saveIndices(lock);
/// Save the new file sizes.
storage.saveFileSizes(lock);
done = true;
@ -232,15 +242,11 @@ public:
private:
StorageStripeLog & storage;
StorageMetadataPtr metadata_snapshot;
std::unique_lock<std::shared_timed_mutex> lock;
WriteLock lock;
String data_out_file;
std::unique_ptr<WriteBuffer> data_out_compressed;
std::unique_ptr<CompressedWriteBuffer> data_out;
String index_out_file;
std::unique_ptr<WriteBuffer> index_out_compressed;
std::unique_ptr<CompressedWriteBuffer> index_out;
NativeWriter block_out;
std::unique_ptr<NativeWriter> block_out;
bool done = false;
};
@ -258,8 +264,10 @@ StorageStripeLog::StorageStripeLog(
: IStorage(table_id_)
, disk(std::move(disk_))
, table_path(relative_path_)
, max_compress_block_size(max_compress_block_size_)
, data_file_path(table_path + "data.bin")
, index_file_path(table_path + "index.mrk")
, file_checker(disk, table_path + "sizes.json")
, max_compress_block_size(max_compress_block_size_)
, log(&Poco::Logger::get("StorageStripeLog"))
{
StorageInMemoryMetadata storage_metadata;
@ -271,6 +279,13 @@ StorageStripeLog::StorageStripeLog(
if (relative_path_.empty())
throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME);
/// Ensure the file checker is initialized.
if (file_checker.empty())
{
file_checker.setEmpty(data_file_path);
file_checker.setEmpty(index_file_path);
}
if (!attach)
{
/// create directories if they do not exist
@ -290,6 +305,9 @@ StorageStripeLog::StorageStripeLog(
}
StorageStripeLog::~StorageStripeLog() = default;
void StorageStripeLog::rename(const String & new_path_to_table_data, const StorageID & new_table_id)
{
assert(table_path != new_path_to_table_data);
@ -297,6 +315,8 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora
disk->moveDirectory(table_path, new_path_to_table_data);
table_path = new_path_to_table_data;
data_file_path = table_path + "data.bin";
index_file_path = table_path + "index.mrk";
file_checker.setPath(table_path + "sizes.json");
}
renameInMemory(new_table_id);
@ -322,41 +342,38 @@ Pipe StorageStripeLog::read(
const size_t /*max_block_size*/,
unsigned num_streams)
{
std::shared_lock lock(rwlock, getLockTimeout(context));
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
auto lock_timeout = getLockTimeout(context);
loadIndices(lock_timeout);
ReadLock lock{rwlock, lock_timeout};
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
NameSet column_names_set(column_names.begin(), column_names.end());
Pipes pipes;
String index_file = table_path + "index.mrk";
if (file_checker.empty() || !disk->exists(index_file))
{
if (!file_checker.getFileSize(data_file_path))
return Pipe(std::make_shared<NullSource>(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID())));
}
ReadSettings read_settings = context->getReadSettings();
auto indices_for_selected_columns
= std::make_shared<IndexForNativeFormat>(indices.extractIndexForColumns(NameSet{column_names.begin(), column_names.end()}));
CompressedReadBufferFromFile index_in(disk->readFile(index_file, read_settings.adjustBufferSize(4096)));
std::shared_ptr<const IndexForNativeFormat> index{std::make_shared<IndexForNativeFormat>(index_in, column_names_set)};
size_t size = index->blocks.size();
size_t size = indices_for_selected_columns->blocks.size();
if (num_streams > size)
num_streams = size;
ReadSettings read_settings = context->getReadSettings();
Pipes pipes;
for (size_t stream = 0; stream < num_streams; ++stream)
{
IndexForNativeFormat::Blocks::const_iterator begin = index->blocks.begin();
IndexForNativeFormat::Blocks::const_iterator end = index->blocks.begin();
IndexForNativeFormat::Blocks::const_iterator begin = indices_for_selected_columns->blocks.begin();
IndexForNativeFormat::Blocks::const_iterator end = indices_for_selected_columns->blocks.begin();
std::advance(begin, stream * size / num_streams);
std::advance(end, (stream + 1) * size / num_streams);
pipes.emplace_back(std::make_shared<StripeLogSource>(
*this, metadata_snapshot, column_names, read_settings, index, begin, end));
*this, metadata_snapshot, column_names, read_settings, indices_for_selected_columns, begin, end));
}
/// We do not keep read lock directly at the time of reading, because we read ranges of data that do not change.
@ -367,7 +384,7 @@ Pipe StorageStripeLog::read(
SinkToStoragePtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context)
{
std::unique_lock lock(rwlock, getLockTimeout(context));
WriteLock lock{rwlock, getLockTimeout(context)};
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
@ -377,17 +394,91 @@ SinkToStoragePtr StorageStripeLog::write(const ASTPtr & /*query*/, const Storage
CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, ContextPtr context)
{
std::shared_lock lock(rwlock, getLockTimeout(context));
ReadLock lock{rwlock, getLockTimeout(context)};
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
return file_checker.check();
}
void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
{
disk->clearDirectory(table_path);
file_checker = FileChecker{disk, table_path + "sizes.json"};
indices.clear();
file_checker.setEmpty(data_file_path);
file_checker.setEmpty(index_file_path);
indices_loaded = true;
num_indices_saved = 0;
}
void StorageStripeLog::loadIndices(std::chrono::seconds lock_timeout)
{
if (indices_loaded)
return;
/// We load indices with an exclusive lock (i.e. the write lock) because we don't want
/// a data race between two threads trying to load indices simultaneously.
WriteLock lock{rwlock, lock_timeout};
if (!lock)
throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED);
loadIndices(lock);
}
void StorageStripeLog::loadIndices(const WriteLock & /* already locked exclusively */)
{
if (indices_loaded)
return;
if (disk->exists(index_file_path))
{
CompressedReadBufferFromFile index_in(disk->readFile(index_file_path, ReadSettings{}.adjustBufferSize(4096)));
indices.read(index_in);
}
indices_loaded = true;
num_indices_saved = indices.blocks.size();
}
void StorageStripeLog::saveIndices(const WriteLock & /* already locked for writing */)
{
size_t num_indices = indices.blocks.size();
if (num_indices_saved == num_indices)
return;
size_t start = num_indices_saved;
auto index_out_compressed = disk->writeFile(index_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append);
auto index_out = std::make_unique<CompressedWriteBuffer>(*index_out_compressed);
for (size_t i = start; i != num_indices; ++i)
indices.blocks[i].write(*index_out);
index_out->next();
index_out_compressed->next();
index_out_compressed->finalize();
num_indices_saved = num_indices;
}
void StorageStripeLog::removeUnsavedIndices(const WriteLock & /* already locked for writing */)
{
if (indices.blocks.size() > num_indices_saved)
indices.blocks.resize(num_indices_saved);
}
void StorageStripeLog::saveFileSizes(const WriteLock & /* already locked for writing */)
{
file_checker.update(data_file_path);
file_checker.update(index_file_path);
file_checker.save();
}

View File

@ -7,12 +7,15 @@
#include <Core/Defines.h>
#include <Storages/IStorage.h>
#include <Formats/IndexForNativeFormat.h>
#include <Common/FileChecker.h>
#include <Common/escapeForFileName.h>
namespace DB
{
struct IndexForNativeFormat;
/** Implements a table engine that is suitable for small chunks of the log.
* In doing so, stores all the columns in a single Native file, with a nearby index.
*/
@ -23,6 +26,8 @@ class StorageStripeLog final : public shared_ptr_helper<StorageStripeLog>, publi
friend struct shared_ptr_helper<StorageStripeLog>;
public:
~StorageStripeLog() override;
String getName() const override { return "StripeLog"; }
Pipe read(
@ -57,18 +62,36 @@ protected:
size_t max_compress_block_size_);
private:
struct ColumnData
{
String data_file_path;
};
using Files = std::map<String, ColumnData>; /// file name -> column data
using ReadLock = std::shared_lock<std::shared_timed_mutex>;
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
DiskPtr disk;
/// Reads the index file if it hasn't read yet.
/// It is done lazily, so that with a large number of tables, the server starts quickly.
void loadIndices(std::chrono::seconds lock_timeout);
void loadIndices(const WriteLock &);
/// Saves the index file.
void saveIndices(const WriteLock &);
/// Removes all unsaved indices.
void removeUnsavedIndices(const WriteLock &);
/// Saves the sizes of the data and index files.
void saveFileSizes(const WriteLock &);
const DiskPtr disk;
String table_path;
size_t max_compress_block_size;
String data_file_path;
String index_file_path;
FileChecker file_checker;
IndexForNativeFormat indices;
std::atomic<bool> indices_loaded = false;
size_t num_indices_saved = 0;
const size_t max_compress_block_size;
std::shared_timed_mutex rwlock;
Poco::Logger * log;