ClickHouse/src/Storages/MergeTree/IMergeTreeReader.h

99 lines
3.7 KiB
C++
Raw Normal View History

2013-11-26 11:55:11 +00:00
#pragma once
#include <Core/NamesAndTypes.h>
Data Skipping Indices (#4143) * made index parser * added index parsing * some fixes * added index interface and factory * fixed compilation * ptrs * added indexParts * indextypes * index condition * IndexCondition * added indexes in selectexecutor * fix * changed comment * fix * added granularity * comments * fix * fix * added writing indexes * removed indexpart class * fix * added setSkipIndexes * add rw for MergeTreeIndexes * fixes * upd error * fix * fix * reading * test index * fixed nullptr error * fixed * fix * unique names * asts -> exprlist * minmax index * fix * fixed select * fixed merging * fixed mutation * working minmax * removed test index * fixed style * added indexes to checkDataPart * added tests for minmax index * fixed constructor * fix style * fixed includes * fixed setSkipIndexes * added indexes meta to zookeeper * added parsing * removed throw * alter cmds parse * fix * added alter * fix * alters fix * fix alters * fix "after" * fixed alter * alter fix + test * fixes * upd setSkipIndexes * fixed alter bug with drop all indices * fix metadata editing * new test and repl fix * rm test files * fixed repl alter * fix * fix * indices * MTReadStream * upd test for bug * fix * added useful parsers and ast classes * fix * fix comments * replaced columns * fix * fixed parsing * fixed printing * fix err * basic IndicesDescription * go to IndicesDescr * moved indices * go to indicesDescr * fix test minmax_index* * fixed MT alter * fixed bug with replMT indices storing in zk * rename * refactoring * docs ru * docs ru * docs en * refactor * rename tests * fix docs * refactoring * fix * fix * fix * fixed style * unique idx * unique * fix * better minmax calculation * upd * added getBlock * unique_condition * added termForAST * unique * fixed not * uniqueCondition::mayBeTrueOnGranule * fix * fixed bug with double column * is always true * fix * key set * spaces * test * tests * fix * unique * fix * fix * fixed bug with duplicate column * removed unused data * fix * fixes * __bitSwapLastTwo * fix
2019-02-05 14:50:25 +00:00
#include <Storages/MergeTree/MergeTreeReaderStream.h>
2019-10-10 16:30:30 +00:00
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
2013-11-26 11:55:11 +00:00
namespace DB
{
2016-11-20 12:43:20 +00:00
class IDataType;
2017-01-24 17:25:47 +00:00
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
2019-10-10 16:30:30 +00:00
class IMergeTreeReader : private boost::noncopyable
2013-11-26 11:55:11 +00:00
{
public:
using ValueSizeMap = std::map<std::string, double>;
using DeserializeBinaryBulkStateMap = std::map<std::string, IDataType::DeserializeBinaryBulkStatePtr>;
IMergeTreeReader(
const MergeTreeData::DataPartPtr & data_part_,
2019-10-10 16:30:30 +00:00
const NamesAndTypesList & columns_,
const StorageMetadataPtr & metadata_snapshot_,
2019-08-03 11:02:40 +00:00
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,
2019-10-10 16:30:30 +00:00
const MarkRanges & all_mark_ranges_,
const MergeTreeReaderSettings & settings_,
2019-10-10 16:30:30 +00:00
const ValueSizeMap & avg_value_size_hints_ = ValueSizeMap{});
2014-07-23 15:24:45 +00:00
2019-10-10 16:30:30 +00:00
/// Return the number of rows has been read or zero if there is no columns to read.
/// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
2019-12-19 13:10:57 +00:00
virtual size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) = 0;
2019-10-10 16:30:30 +00:00
virtual bool canReadIncompleteGranules() const = 0;
2019-10-10 16:30:30 +00:00
virtual ~IMergeTreeReader();
2013-11-26 11:55:11 +00:00
const ValueSizeMap & getAvgValueSizeHints() const;
2015-09-16 17:49:08 +00:00
/// Add columns from ordered_names that are not present in the block.
/// Missing columns are added in the order specified by ordered_names.
2019-09-23 19:22:02 +00:00
/// num_rows is needed in case if all res_columns are nullptr.
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows);
/// Evaluate defaulted columns if necessary.
void evaluateMissingDefaults(Block additional_columns, Columns & res_columns);
2014-12-04 15:50:48 +00:00
2020-02-13 20:09:48 +00:00
/// If part metadata is not equal to storage metadata, than
/// try to perform conversions of columns.
2020-01-15 13:00:08 +00:00
void performRequiredConversions(Columns & res_columns);
2018-02-13 19:34:15 +00:00
const NamesAndTypesList & getColumns() const { return columns; }
2019-10-31 11:32:24 +00:00
size_t numColumnsInResult() const { return columns.size(); }
2018-02-13 19:34:15 +00:00
2019-03-25 16:55:48 +00:00
size_t getFirstMarkToRead() const
{
return all_mark_ranges.front().begin;
2019-03-25 16:55:48 +00:00
}
2013-11-26 11:55:11 +00:00
2019-11-07 11:11:38 +00:00
MergeTreeData::DataPartPtr data_part;
2019-10-10 16:30:30 +00:00
protected:
/// Returns actual column type in part, which can differ from table metadata.
NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const;
2020-06-03 22:00:02 +00:00
void checkNumberOfColumns(size_t num_columns_to_read) const;
2020-04-14 19:47:19 +00:00
/// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size.
ValueSizeMap avg_value_size_hints;
/// Stores states for IDataType::deserializeBinaryBulk
DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map;
/// Columns that are read.
NamesAndTypesList columns;
UncompressedCache * uncompressed_cache;
MarkCache * mark_cache;
2019-10-10 16:30:30 +00:00
MergeTreeReaderSettings settings;
2015-04-16 06:12:35 +00:00
2018-10-17 03:13:00 +00:00
const MergeTreeData & storage;
StorageMetadataPtr metadata_snapshot;
MarkRanges all_mark_ranges;
2015-04-02 03:08:43 +00:00
2020-06-01 17:52:09 +00:00
using ColumnPosition = std::optional<size_t>;
ColumnPosition findColumnForOffsets(const String & column_name) const;
friend class MergeTreeRangeReader::DelayedStream;
private:
2020-04-03 11:09:27 +00:00
/// Alter conversions, which must be applied on fly if required
2020-03-25 18:44:08 +00:00
MergeTreeData::AlterConversions alter_conversions;
/// Actual data type of columns in part
std::unordered_map<String, DataTypePtr> columns_from_part;
2013-11-26 11:55:11 +00:00
};
}