2013-11-26 11:55:11 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/NamesAndTypes.h>
|
2019-02-05 14:50:25 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeReaderStream.h>
|
2018-06-19 18:09:09 +00:00
|
|
|
#include <port/clock.h>
|
2013-11-26 11:55:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
class IDataType;
|
2017-01-24 17:25:47 +00:00
|
|
|
|
|
|
|
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
|
|
|
|
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
|
|
|
|
/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
|
2016-12-10 04:51:36 +00:00
|
|
|
class MergeTreeReader : private boost::noncopyable
|
2013-11-26 11:55:11 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
using ValueSizeMap = std::map<std::string, double>;
|
2018-05-21 16:21:15 +00:00
|
|
|
using DeserializeBinaryBulkStateMap = std::map<std::string, IDataType::DeserializeBinaryBulkStatePtr>;
|
2015-12-13 04:52:13 +00:00
|
|
|
|
2019-09-23 19:22:02 +00:00
|
|
|
MergeTreeReader(String path_, /// Path to the directory containing the part
|
|
|
|
MergeTreeData::DataPartPtr data_part_,
|
|
|
|
NamesAndTypesList columns_,
|
2019-08-03 11:02:40 +00:00
|
|
|
UncompressedCache * uncompressed_cache_,
|
|
|
|
MarkCache * mark_cache_,
|
|
|
|
bool save_marks_in_cache_,
|
2019-09-23 19:22:02 +00:00
|
|
|
const MergeTreeData & storage_,
|
|
|
|
MarkRanges all_mark_ranges_,
|
|
|
|
size_t aio_threshold_,
|
2020-01-04 05:46:50 +00:00
|
|
|
size_t mmap_threshold_,
|
2019-09-23 19:22:02 +00:00
|
|
|
size_t max_read_buffer_size_,
|
|
|
|
ValueSizeMap avg_value_size_hints_ = ValueSizeMap{},
|
2019-08-03 11:02:40 +00:00
|
|
|
const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback{},
|
|
|
|
clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE);
|
2014-07-23 15:24:45 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
~MergeTreeReader();
|
2013-11-26 11:55:11 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
const ValueSizeMap & getAvgValueSizeHints() const;
|
2015-09-16 17:49:08 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Add columns from ordered_names that are not present in the block.
|
|
|
|
/// Missing columns are added in the order specified by ordered_names.
|
2019-09-23 19:22:02 +00:00
|
|
|
/// num_rows is needed in case if all res_columns are nullptr.
|
|
|
|
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows);
|
2018-02-22 12:43:57 +00:00
|
|
|
/// Evaluate defaulted columns if necessary.
|
2019-10-02 11:57:17 +00:00
|
|
|
void evaluateMissingDefaults(Block additional_columns, Columns & res_columns);
|
2014-12-04 15:50:48 +00:00
|
|
|
|
2020-01-15 13:00:08 +00:00
|
|
|
/// Perform conversions TODO(alesap)
|
|
|
|
void performRequiredConversions(Columns & res_columns);
|
|
|
|
|
2018-02-13 19:34:15 +00:00
|
|
|
const NamesAndTypesList & getColumns() const { return columns; }
|
2019-10-31 11:32:24 +00:00
|
|
|
size_t numColumnsInResult() const { return columns.size(); }
|
2018-02-13 19:34:15 +00:00
|
|
|
|
2018-11-28 15:05:28 +00:00
|
|
|
/// Return the number of rows has been read or zero if there is no columns to read.
|
2019-09-23 19:22:02 +00:00
|
|
|
/// If continue_reading is true, continue reading from last state, otherwise seek to from_mark.
|
|
|
|
/// Fills res_columns in order specified in getColumns() list. If column was not read it will be nullptr.
|
|
|
|
size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns);
|
2018-11-28 15:05:28 +00:00
|
|
|
|
2018-11-15 14:06:54 +00:00
|
|
|
MergeTreeData::DataPartPtr data_part;
|
2019-03-25 16:55:48 +00:00
|
|
|
|
|
|
|
size_t getFirstMarkToRead() const
|
|
|
|
{
|
|
|
|
return all_mark_ranges.back().begin;
|
|
|
|
}
|
2016-07-19 10:57:57 +00:00
|
|
|
private:
|
2019-02-05 14:50:25 +00:00
|
|
|
using FileStreams = std::map<std::string, std::unique_ptr<MergeTreeReaderStream>>;
|
2013-11-26 11:55:11 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size.
|
|
|
|
ValueSizeMap avg_value_size_hints;
|
2018-05-21 16:21:15 +00:00
|
|
|
/// Stores states for IDataType::deserializeBinaryBulk
|
|
|
|
DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map;
|
2018-10-03 17:10:23 +00:00
|
|
|
/// Path to the directory containing the part
|
2017-04-01 07:20:54 +00:00
|
|
|
String path;
|
2017-01-24 20:44:12 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
FileStreams streams;
|
2015-04-09 00:37:08 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Columns that are read.
|
2017-12-25 21:57:29 +00:00
|
|
|
NamesAndTypesList columns;
|
2015-04-09 00:37:08 +00:00
|
|
|
|
2020-01-15 13:00:08 +00:00
|
|
|
std::unordered_map<String, DataTypePtr> columns_from_part;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
UncompressedCache * uncompressed_cache;
|
|
|
|
MarkCache * mark_cache;
|
|
|
|
/// If save_marks_in_cache is false, then, if marks are not in cache, we will load them but won't save in the cache, to avoid evicting other data.
|
|
|
|
bool save_marks_in_cache;
|
2015-04-16 06:12:35 +00:00
|
|
|
|
2018-10-17 03:13:00 +00:00
|
|
|
const MergeTreeData & storage;
|
2017-04-01 07:20:54 +00:00
|
|
|
MarkRanges all_mark_ranges;
|
|
|
|
size_t aio_threshold;
|
2020-01-04 05:46:50 +00:00
|
|
|
size_t mmap_threshold;
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t max_read_buffer_size;
|
2013-11-26 11:55:11 +00:00
|
|
|
|
2019-01-04 12:10:00 +00:00
|
|
|
void addStreams(const String & name, const IDataType & type,
|
2017-08-07 07:31:16 +00:00
|
|
|
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
|
2015-04-09 00:37:08 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void readData(
|
|
|
|
const String & name, const IDataType & type, IColumn & column,
|
2017-07-11 09:32:39 +00:00
|
|
|
size_t from_mark, bool continue_reading, size_t max_rows_to_read,
|
2017-08-07 07:31:16 +00:00
|
|
|
bool read_offsets = true);
|
2015-04-02 03:08:43 +00:00
|
|
|
|
2017-06-14 10:50:22 +00:00
|
|
|
|
2018-02-20 11:45:58 +00:00
|
|
|
friend class MergeTreeRangeReader::DelayedStream;
|
2013-11-26 11:55:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|