2011-10-31 17:55:06 +00:00
|
|
|
#pragma once
|
2010-03-18 19:32:14 +00:00
|
|
|
|
2015-01-25 05:07:51 +00:00
|
|
|
#include <map>
|
2017-07-28 17:34:02 +00:00
|
|
|
#include <shared_mutex>
|
2016-08-26 21:25:05 +00:00
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
#include <Disks/IDisk.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Common/FileChecker.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
2020-09-14 11:22:17 +00:00
|
|
|
#include <Core/NamesAndTypes.h>
|
2010-03-18 19:32:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2022-07-13 20:35:24 +00:00
|
|
|
|
2022-05-31 09:33:23 +00:00
|
|
|
class IBackup;
|
|
|
|
using BackupPtr = std::shared_ptr<const IBackup>;
|
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
/** Implements Log - a simple table engine without support of indices.
|
2017-04-16 15:00:33 +00:00
|
|
|
* The data is stored in a compressed form.
|
2021-08-26 22:15:24 +00:00
|
|
|
*
|
|
|
|
* Also implements TinyLog - a table engine that is suitable for small chunks of the log.
|
|
|
|
* It differs from Log in the absence of mark files.
|
2010-03-18 19:32:14 +00:00
|
|
|
*/
|
2022-07-13 20:35:24 +00:00
|
|
|
class StorageLog final : public IStorage, public WithMutableContext
|
2010-03-18 19:32:14 +00:00
|
|
|
{
|
2020-01-31 15:10:10 +00:00
|
|
|
friend class LogSource;
|
2021-07-23 19:33:59 +00:00
|
|
|
friend class LogSink;
|
2010-03-18 19:32:14 +00:00
|
|
|
|
|
|
|
public:
|
2022-04-19 20:47:29 +00:00
|
|
|
/** Attach the table with the appropriate name, along the appropriate path (with / at the end),
|
|
|
|
* (the correctness of names and paths is not verified)
|
|
|
|
* consisting of the specified columns; Create files if they do not exist.
|
|
|
|
*/
|
|
|
|
StorageLog(
|
|
|
|
const String & engine_name_,
|
|
|
|
DiskPtr disk_,
|
|
|
|
const std::string & relative_path_,
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const ColumnsDescription & columns_,
|
|
|
|
const ConstraintsDescription & constraints_,
|
|
|
|
const String & comment,
|
|
|
|
bool attach,
|
2022-07-13 20:35:24 +00:00
|
|
|
ContextMutablePtr context_);
|
2022-04-19 20:47:29 +00:00
|
|
|
|
2021-09-20 09:05:34 +00:00
|
|
|
~StorageLog() override;
|
2021-08-26 22:15:24 +00:00
|
|
|
String getName() const override { return engine_name; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-08-03 13:54:14 +00:00
|
|
|
Pipe read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & query_info,
|
2022-07-13 20:35:24 +00:00
|
|
|
ContextPtr local_context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2022-10-07 10:46:45 +00:00
|
|
|
size_t num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-06-07 18:33:08 +00:00
|
|
|
SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool async_insert) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-08-10 11:44:16 +00:00
|
|
|
CheckResults checkData(const ASTPtr & query, ContextPtr local_context) override;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override;
|
2018-04-21 00:35:20 +00:00
|
|
|
|
2020-11-01 17:38:43 +00:00
|
|
|
bool storesDataOnDisk() const override { return true; }
|
2019-12-12 08:57:25 +00:00
|
|
|
Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; }
|
2020-12-22 16:40:53 +00:00
|
|
|
bool supportsSubcolumns() const override { return true; }
|
2021-07-12 10:06:24 +00:00
|
|
|
ColumnSizeByName getColumnSizes() const override;
|
2018-02-21 19:26:59 +00:00
|
|
|
|
2022-05-03 17:55:45 +00:00
|
|
|
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
|
|
|
std::optional<UInt64> totalBytes(const Settings & settings) const override;
|
|
|
|
|
2022-05-31 09:33:23 +00:00
|
|
|
void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
|
|
|
|
void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
|
2021-10-26 09:48:31 +00:00
|
|
|
|
2017-11-04 03:20:18 +00:00
|
|
|
private:
|
2021-08-26 22:15:24 +00:00
|
|
|
using ReadLock = std::shared_lock<std::shared_timed_mutex>;
|
|
|
|
using WriteLock = std::unique_lock<std::shared_timed_mutex>;
|
|
|
|
|
|
|
|
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
|
|
|
|
/// Should be called from the constructor only.
|
|
|
|
void addDataFiles(const NameAndTypePair & column);
|
|
|
|
|
|
|
|
/// Reads the marks file if it hasn't read yet.
|
|
|
|
/// It is done lazily, so that with a large number of tables, the server starts quickly.
|
|
|
|
void loadMarks(std::chrono::seconds lock_timeout);
|
|
|
|
void loadMarks(const WriteLock &);
|
|
|
|
|
|
|
|
/// Saves the marks file.
|
|
|
|
void saveMarks(const WriteLock &);
|
|
|
|
|
|
|
|
/// Removes all unsaved marks.
|
|
|
|
void removeUnsavedMarks(const WriteLock &);
|
|
|
|
|
|
|
|
/// Saves the sizes of the data and marks files.
|
|
|
|
void saveFileSizes(const WriteLock &);
|
|
|
|
|
2022-05-03 17:55:45 +00:00
|
|
|
/// Recalculates the number of rows stored in this table.
|
|
|
|
void updateTotalRows(const WriteLock &);
|
|
|
|
|
2022-05-31 09:33:23 +00:00
|
|
|
/// Restores the data of this table from backup.
|
|
|
|
void restoreDataImpl(const BackupPtr & backup, const String & data_path_in_backup, std::chrono::seconds lock_timeout);
|
2022-05-29 19:53:56 +00:00
|
|
|
|
2017-11-26 19:22:33 +00:00
|
|
|
/** Offsets to some row number in a file for column in table.
|
|
|
|
* They are needed so that you can read the data in several threads.
|
|
|
|
*/
|
|
|
|
struct Mark
|
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
size_t rows; /// How many rows are before this offset including the block at this offset.
|
|
|
|
size_t offset; /// The offset in compressed file.
|
2021-08-26 22:15:24 +00:00
|
|
|
|
|
|
|
void write(WriteBuffer & out) const;
|
|
|
|
void read(ReadBuffer & in);
|
2017-11-26 19:22:33 +00:00
|
|
|
};
|
|
|
|
using Marks = std::vector<Mark>;
|
|
|
|
|
|
|
|
/// Column data
|
2021-08-26 22:15:24 +00:00
|
|
|
struct DataFile
|
2017-11-26 19:22:33 +00:00
|
|
|
{
|
2021-08-26 22:15:24 +00:00
|
|
|
size_t index;
|
|
|
|
String name;
|
|
|
|
String path;
|
2017-11-26 19:22:33 +00:00
|
|
|
Marks marks;
|
|
|
|
};
|
2019-12-25 08:24:13 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
const String engine_name;
|
|
|
|
const DiskPtr disk;
|
2019-12-26 14:03:32 +00:00
|
|
|
String table_path;
|
2017-11-26 19:22:33 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
std::vector<DataFile> data_files;
|
|
|
|
size_t num_data_files = 0;
|
|
|
|
std::map<String, DataFile *> data_files_by_names;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
/// The Log engine uses the marks file, and the TinyLog engine doesn't.
|
|
|
|
const bool use_marks_file;
|
2014-09-30 03:08:47 +00:00
|
|
|
|
2019-12-25 08:24:13 +00:00
|
|
|
String marks_file_path;
|
2021-08-26 22:15:24 +00:00
|
|
|
std::atomic<bool> marks_loaded = false;
|
|
|
|
size_t num_marks_saved = 0;
|
2014-03-28 14:36:24 +00:00
|
|
|
|
2022-05-03 17:55:45 +00:00
|
|
|
std::atomic<UInt64> total_rows = 0;
|
|
|
|
std::atomic<UInt64> total_bytes = 0;
|
2014-03-28 14:36:24 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
FileChecker file_checker;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
const size_t max_compress_block_size;
|
2017-11-04 03:20:18 +00:00
|
|
|
|
2021-08-26 22:15:24 +00:00
|
|
|
mutable std::shared_timed_mutex rwlock;
|
2010-03-18 19:32:14 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|