ClickHouse/dbms/src/Storages/StorageLog.h

173 lines
5.4 KiB
C++
Raw Normal View History

2011-10-31 17:55:06 +00:00
#pragma once
2010-03-18 19:32:14 +00:00
2015-01-25 05:07:51 +00:00
#include <map>
#include <ext/shared_ptr_helper.hpp>
2010-03-18 19:32:14 +00:00
#include <Poco/File.h>
#include <Poco/RWLock.h>
2010-03-18 19:32:14 +00:00
#include <Storages/IStorage.h>
#include <Common/FileChecker.h>
#include <Common/escapeForFileName.h>
2010-03-18 19:32:14 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NO_SUCH_COLUMN_IN_TABLE;
}
2017-04-16 15:00:33 +00:00
/** Offsets to every single set of values.
* These sets are the same size in different columns.
* They are needed so that you can read the data in several threads.
2012-01-10 22:11:51 +00:00
*/
struct Mark
{
2017-04-16 15:00:33 +00:00
size_t rows; /// How many lines are contained in this set and all previous ones.
size_t offset; /// The offset to the set in the compressed file.
2012-01-10 22:11:51 +00:00
};
2014-01-17 15:19:20 +00:00
using Marks = std::vector<Mark>;
2012-01-10 22:11:51 +00:00
2017-04-16 15:00:33 +00:00
/** Implements a repository that is suitable for logs.
* Keys are not supported.
* The data is stored in a compressed form.
2010-03-18 19:32:14 +00:00
*/
class StorageLog : private ext::shared_ptr_helper<StorageLog>, public IStorage
2010-03-18 19:32:14 +00:00
{
friend class ext::shared_ptr_helper<StorageLog>;
2010-03-18 19:32:14 +00:00
friend class LogBlockInputStream;
friend class LogBlockOutputStream;
public:
2017-04-16 15:00:33 +00:00
/** hook the table with the appropriate name, along the appropriate path (with / at the end),
* (the correctness of names and paths is not verified)
* consisting of the specified columns; Create files if they do not exist.
*/
static StoragePtr create(
const std::string & path_,
const std::string & name_,
NamesAndTypesListPtr columns_,
const NamesAndTypesList & materialized_columns_,
const NamesAndTypesList & alias_columns_,
const ColumnDefaults & column_defaults_,
size_t max_compress_block_size_ = DEFAULT_MAX_COMPRESS_BLOCK_SIZE);
static StoragePtr create(
const std::string & path_,
const std::string & name_,
NamesAndTypesListPtr columns_,
size_t max_compress_block_size_ = DEFAULT_MAX_COMPRESS_BLOCK_SIZE);
std::string getName() const override { return "Log"; }
std::string getTableName() const override { return name; }
const NamesAndTypesList & getColumnsListImpl() const override { return *columns; }
virtual BlockInputStreams read(
const Names & column_names,
ASTPtr query,
const Context & context,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size = DEFAULT_BLOCK_SIZE,
unsigned threads = 1) override;
BlockOutputStreamPtr write(ASTPtr query, const Settings & settings) override;
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
2017-04-16 15:00:33 +00:00
/// Column data
struct ColumnData
{
2017-04-16 15:00:33 +00:00
/// Specifies the column number in the marks file.
/// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here.
size_t column_index;
Poco::File data_file;
Marks marks;
};
using Files_t = std::map<String, ColumnData>;
bool checkData() const override;
protected:
String path;
String name;
NamesAndTypesListPtr columns;
Poco::RWLock rwlock;
StorageLog(
const std::string & path_,
const std::string & name_,
NamesAndTypesListPtr columns_,
const NamesAndTypesList & materialized_columns_,
const NamesAndTypesList & alias_columns_,
const ColumnDefaults & column_defaults_,
size_t max_compress_block_size_);
2017-04-16 15:00:33 +00:00
/// Read marks files if they are not already read.
/// It is done lazily, so that with a large number of tables, the server starts quickly.
/// You can not call with a write locked `rwlock`.
void loadMarks();
2017-04-16 15:00:33 +00:00
/// Can be called with any state of `rwlock`.
size_t marksCount();
BlockInputStreams read(
size_t from_mark,
size_t to_mark,
size_t from_null_mark,
const Names & column_names,
ASTPtr query,
const Context & context,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size = DEFAULT_BLOCK_SIZE,
unsigned threads = 1);
private:
Files_t files; /// name -> data
Names column_names; /// column_index -> name
Names null_map_filenames;
Poco::File marks_file;
Poco::File null_marks_file;
void loadMarksImpl(bool load_null_marks);
2017-04-16 15:00:33 +00:00
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
void addFile(const String & column_name, const IDataType & type, size_t level = 0);
2012-08-29 20:07:24 +00:00
bool loaded_marks;
bool has_nullable_columns = false;
size_t max_compress_block_size;
size_t file_count = 0;
size_t null_file_count = 0;
2014-03-28 14:36:24 +00:00
protected:
FileChecker file_checker;
private:
2017-04-16 15:00:33 +00:00
/** For normal columns, the number of rows in the block is specified in the marks.
* For array columns and nested structures, there are more than one group of marks that correspond to different files
* - for insides (file name.bin) - the total number of array elements in the block is specified,
* - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified.
*
2017-04-16 15:00:33 +00:00
* Return the first group of marks that contain the number of rows, but not the internals of the arrays.
*/
const Marks & getMarksWithRealRowCount() const;
std::string getFullPath() const { return path + escapeForFileName(name) + '/';}
2010-03-18 19:32:14 +00:00
};
}