ClickHouse/src/Storages/StorageLog.h

145 lines
4.9 KiB
C++
Raw Normal View History

2011-10-31 17:55:06 +00:00
#pragma once
2010-03-18 19:32:14 +00:00
2015-01-25 05:07:51 +00:00
#include <map>
#include <shared_mutex>
2021-10-02 07:13:14 +00:00
#include <base/shared_ptr_helper.h>
#include <Disks/IDisk.h>
#include <Storages/IStorage.h>
#include <Common/FileChecker.h>
#include <Common/escapeForFileName.h>
#include <Core/NamesAndTypes.h>
2010-03-18 19:32:14 +00:00
namespace DB
{
/** Implements Log - a simple table engine without support of indices.
2017-04-16 15:00:33 +00:00
* The data is stored in a compressed form.
*
* Also implements TinyLog - a table engine that is suitable for small chunks of the log.
* It differs from Log in the absence of mark files.
2010-03-18 19:32:14 +00:00
*/
2021-06-15 19:55:21 +00:00
class StorageLog final : public shared_ptr_helper<StorageLog>, public IStorage
2010-03-18 19:32:14 +00:00
{
friend class LogSource;
2021-07-23 19:33:59 +00:00
friend class LogSink;
2022-01-17 18:55:40 +00:00
friend class LogRestoreTask;
2021-06-15 19:55:21 +00:00
friend struct shared_ptr_helper<StorageLog>;
2010-03-18 19:32:14 +00:00
public:
2021-09-20 09:05:34 +00:00
~StorageLog() override;
String getName() const override { return engine_name; }
2020-08-03 13:54:14 +00:00
Pipe read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
2017-06-02 15:54:39 +00:00
unsigned num_streams) override;
2021-07-23 19:33:59 +00:00
SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override;
2020-04-07 14:05:51 +00:00
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) override;
void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;
2018-04-21 00:35:20 +00:00
2020-11-01 17:38:43 +00:00
bool storesDataOnDisk() const override { return true; }
Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; }
2020-12-22 16:40:53 +00:00
bool supportsSubcolumns() const override { return true; }
2021-07-12 10:06:24 +00:00
ColumnSizeByName getColumnSizes() const override;
bool hasDataToBackup() const override { return true; }
BackupEntries backupData(ContextPtr context, const ASTs & partitions) override;
2022-04-19 18:15:27 +00:00
RestoreTaskPtr restoreData(ContextMutablePtr context, const ASTs & partitions, const BackupPtr & backup, const String & data_path_in_backup, const StorageRestoreSettings & restore_settings, const std::shared_ptr<IRestoreCoordination> & restore_coordination) override;
std::optional<UInt64> totalRows(const Settings & settings) const override;
std::optional<UInt64> totalBytes(const Settings & settings) const override;
protected:
/** Attach the table with the appropriate name, along the appropriate path (with / at the end),
* (the correctness of names and paths is not verified)
* consisting of the specified columns; Create files if they do not exist.
*/
StorageLog(
const String & engine_name_,
DiskPtr disk_,
2019-10-25 19:07:47 +00:00
const std::string & relative_path_,
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
const ColumnsDescription & columns_,
2019-08-24 21:20:20 +00:00
const ConstraintsDescription & constraints_,
2021-04-23 12:18:23 +00:00
const String & comment,
bool attach,
size_t max_compress_block_size_);
private:
using ReadLock = std::shared_lock<std::shared_timed_mutex>;
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
/// Should be called from the constructor only.
void addDataFiles(const NameAndTypePair & column);
/// Reads the marks file if it hasn't read yet.
/// It is done lazily, so that with a large number of tables, the server starts quickly.
void loadMarks(std::chrono::seconds lock_timeout) const;
void loadMarks(const WriteLock &) const;
/// Saves the marks file.
void saveMarks(const WriteLock &);
/// Removes all unsaved marks.
void removeUnsavedMarks(const WriteLock &);
/// Saves the sizes of the data and marks files.
void saveFileSizes(const WriteLock &);
2017-11-26 19:22:33 +00:00
/** Offsets to some row number in a file for column in table.
* They are needed so that you can read the data in several threads.
*/
struct Mark
{
size_t rows; /// How many rows are before this offset including the block at this offset.
size_t offset; /// The offset in compressed file.
void write(WriteBuffer & out) const;
void read(ReadBuffer & in);
2017-11-26 19:22:33 +00:00
};
using Marks = std::vector<Mark>;
/// Column data
struct DataFile
2017-11-26 19:22:33 +00:00
{
size_t index;
String name;
String path;
mutable Marks marks;
2017-11-26 19:22:33 +00:00
};
const String engine_name;
const DiskPtr disk;
String table_path;
2017-11-26 19:22:33 +00:00
std::vector<DataFile> data_files;
size_t num_data_files = 0;
std::map<String, DataFile *> data_files_by_names;
/// The Log engine uses the marks file, and the TinyLog engine doesn't.
const bool use_marks_file;
String marks_file_path;
mutable std::atomic<bool> marks_loaded = false;
mutable size_t num_marks_saved = 0;
2014-03-28 14:36:24 +00:00
FileChecker file_checker;
const size_t max_compress_block_size;
mutable std::shared_timed_mutex rwlock;
2010-03-18 19:32:14 +00:00
};
}