2011-10-31 17:55:06 +00:00
|
|
|
#pragma once
|
2010-03-18 19:32:14 +00:00
|
|
|
|
2015-01-25 05:07:51 +00:00
|
|
|
#include <map>
|
2017-07-28 17:34:02 +00:00
|
|
|
#include <shared_mutex>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/shared_ptr_helper.h>
|
2016-08-26 21:25:05 +00:00
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
#include <Disks/IDisk.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/IStorage.h>
|
|
|
|
#include <Common/FileChecker.h>
|
|
|
|
#include <Common/escapeForFileName.h>
|
2010-03-18 19:32:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2017-08-07 07:31:16 +00:00
|
|
|
/** Implements simple table engine without support of indices.
|
2017-04-16 15:00:33 +00:00
|
|
|
* The data is stored in a compressed form.
|
2010-03-18 19:32:14 +00:00
|
|
|
*/
|
2020-03-19 23:48:53 +00:00
|
|
|
class StorageLog final : public ext::shared_ptr_helper<StorageLog>, public IStorage
|
2010-03-18 19:32:14 +00:00
|
|
|
{
|
2020-01-31 15:10:10 +00:00
|
|
|
friend class LogSource;
|
2019-12-12 08:57:25 +00:00
|
|
|
friend class LogBlockOutputStream;
|
|
|
|
friend struct ext::shared_ptr_helper<StorageLog>;
|
2010-03-18 19:32:14 +00:00
|
|
|
|
|
|
|
public:
|
2019-12-12 08:57:25 +00:00
|
|
|
String getName() const override { return "Log"; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-08-03 13:54:14 +00:00
|
|
|
Pipe read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2020-06-17 16:39:58 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2017-07-15 03:48:36 +00:00
|
|
|
const SelectQueryInfo & query_info,
|
2017-04-01 07:20:54 +00:00
|
|
|
const Context & context,
|
2018-04-19 14:47:09 +00:00
|
|
|
QueryProcessingStage::Enum processed_stage,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-06-02 15:54:39 +00:00
|
|
|
unsigned num_streams) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-06-15 19:08:58 +00:00
|
|
|
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-04-07 14:05:51 +00:00
|
|
|
void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-07-03 13:17:19 +00:00
|
|
|
CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2020-06-18 16:10:47 +00:00
|
|
|
void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override;
|
2018-04-21 00:35:20 +00:00
|
|
|
|
2019-12-12 08:57:25 +00:00
|
|
|
Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; }
|
2018-02-21 19:26:59 +00:00
|
|
|
|
2013-02-07 13:03:19 +00:00
|
|
|
protected:
|
2017-06-06 18:36:13 +00:00
|
|
|
/** Attach the table with the appropriate name, along the appropriate path (with / at the end),
|
|
|
|
* (the correctness of names and paths is not verified)
|
|
|
|
* consisting of the specified columns; Create files if they do not exist.
|
|
|
|
*/
|
2017-04-01 07:20:54 +00:00
|
|
|
StorageLog(
|
2019-12-12 08:57:25 +00:00
|
|
|
DiskPtr disk_,
|
2019-10-25 19:07:47 +00:00
|
|
|
const std::string & relative_path_,
|
2019-12-04 16:06:55 +00:00
|
|
|
const StorageID & table_id_,
|
2018-03-06 20:18:34 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2019-08-24 21:20:20 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
2020-07-12 02:31:58 +00:00
|
|
|
bool attach,
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t max_compress_block_size_);
|
|
|
|
|
2017-11-04 03:20:18 +00:00
|
|
|
private:
|
2017-11-26 19:22:33 +00:00
|
|
|
/** Offsets to some row number in a file for column in table.
|
|
|
|
* They are needed so that you can read the data in several threads.
|
|
|
|
*/
|
|
|
|
struct Mark
|
|
|
|
{
|
2019-12-12 08:57:25 +00:00
|
|
|
size_t rows; /// How many rows are before this offset including the block at this offset.
|
|
|
|
size_t offset; /// The offset in compressed file.
|
2017-11-26 19:22:33 +00:00
|
|
|
};
|
|
|
|
using Marks = std::vector<Mark>;
|
|
|
|
|
|
|
|
/// Column data
|
|
|
|
struct ColumnData
|
|
|
|
{
|
|
|
|
/// Specifies the column number in the marks file.
|
|
|
|
/// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here.
|
|
|
|
size_t column_index;
|
|
|
|
|
2019-12-25 08:24:13 +00:00
|
|
|
String data_file_path;
|
2017-11-26 19:22:33 +00:00
|
|
|
Marks marks;
|
|
|
|
};
|
2019-12-25 08:24:13 +00:00
|
|
|
using Files = std::map<String, ColumnData>; /// file name -> column data
|
|
|
|
|
|
|
|
DiskPtr disk;
|
2019-12-26 14:03:32 +00:00
|
|
|
String table_path;
|
2017-11-26 19:22:33 +00:00
|
|
|
|
2019-12-25 08:24:13 +00:00
|
|
|
mutable std::shared_mutex rwlock;
|
2017-11-26 19:22:33 +00:00
|
|
|
|
2019-12-25 08:24:13 +00:00
|
|
|
Files files;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
Names column_names_by_idx; /// column_index -> name
|
2014-09-30 03:08:47 +00:00
|
|
|
|
2019-12-25 08:24:13 +00:00
|
|
|
String marks_file_path;
|
2014-09-30 03:08:47 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
|
2017-08-07 07:31:16 +00:00
|
|
|
void addFiles(const String & column_name, const IDataType & type);
|
2012-08-29 20:07:24 +00:00
|
|
|
|
2017-11-28 02:13:46 +00:00
|
|
|
bool loaded_marks = false;
|
2013-12-12 22:55:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t max_compress_block_size;
|
|
|
|
size_t file_count = 0;
|
2014-03-28 14:36:24 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
FileChecker file_checker;
|
2014-08-04 06:36:24 +00:00
|
|
|
|
2017-11-04 03:20:18 +00:00
|
|
|
/// Read marks files if they are not already read.
|
|
|
|
/// It is done lazily, so that with a large number of tables, the server starts quickly.
|
|
|
|
/// You can not call with a write locked `rwlock`.
|
|
|
|
void loadMarks();
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** For normal columns, the number of rows in the block is specified in the marks.
|
|
|
|
* For array columns and nested structures, there are more than one group of marks that correspond to different files
|
2017-11-26 19:22:33 +00:00
|
|
|
* - for elements (file name.bin) - the total number of array elements in the block is specified,
|
2017-04-16 15:00:33 +00:00
|
|
|
* - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified.
|
2017-04-01 07:20:54 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* Return the first group of marks that contain the number of rows, but not the internals of the arrays.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2020-06-17 16:39:58 +00:00
|
|
|
const Marks & getMarksWithRealRowCount(const StorageMetadataPtr & metadata_snapshot) const;
|
2010-03-18 19:32:14 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|