mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-11 08:52:06 +00:00
465962df7f
* support orc filter push down * update orc lib version * replace setqueryinfo with setkeycondition * fix issue https://github.com/ClickHouse/ClickHouse/issues/53536 * refactor source with key condition * fix building error * remove std::cout * update orc * update orc version * fix bugs * improve code * upgrade orc lib * fix code style * change as requested * add performance tests for orc filter push down * add performance tests for orc filter push down * fix all bugs * fix default as null issue * add uts for null as default issues * upgrade orc lib * fix failed orc lib uts and fix typo * fix failed uts * fix failed uts * fix ast fuzzer tests * fix bug of uint64 overflow in https://s3.amazonaws.com/clickhouse-test-reports/55330/de22fdcaea2e12c96f300e95f59beba84401712d/fuzzer_astfuzzerubsan/report.html * fix asan fatal caused by reused column vector batch in native orc input format. refer to https://s3.amazonaws.com/clickhouse-test-reports/55330/be39d23af2d7e27f5ec7f168947cf75aeaabf674/stateless_tests__asan__[4_4].htm * fix wrong performance tests * disable 02892_orc_filter_pushdown on aarch64. https://s3.amazonaws.com/clickhouse-test-reports/55330/be39d23af2d7e27f5ec7f168947cf75aeaabf674/stateless_tests__aarch64_.html * add some comments * add some comments * inline range::equals and range::less * fix data race of key condition * trigger ci
82 lines
2.6 KiB
C++
82 lines
2.6 KiB
C++
#pragma once
|
|
|
|
#include <Formats/ColumnMapping.h>
|
|
#include <IO/ReadBuffer.h>
|
|
#include <Interpreters/Context.h>
|
|
#include <Processors/Formats/InputFormatErrorsLogger.h>
|
|
#include <Processors/SourceWithKeyCondition.h>
|
|
#include <Storages/MergeTree/KeyCondition.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
struct SelectQueryInfo;
|
|
|
|
using ColumnMappingPtr = std::shared_ptr<ColumnMapping>;
|
|
|
|
/** Input format is a source, that reads data from ReadBuffer.
|
|
*/
|
|
class IInputFormat : public SourceWithKeyCondition
|
|
{
|
|
protected:
|
|
|
|
ReadBuffer * in [[maybe_unused]] = nullptr;
|
|
|
|
public:
|
|
/// ReadBuffer can be nullptr for random-access formats.
|
|
IInputFormat(Block header, ReadBuffer * in_);
|
|
|
|
/** In some usecase (hello Kafka) we need to read a lot of tiny streams in exactly the same format.
|
|
* The recreating of parser for each small stream takes too long, so we introduce a method
|
|
* resetParser() which allow to reset the state of parser to continue reading of
|
|
* source stream without recreating that.
|
|
* That should be called after current buffer was fully read.
|
|
*/
|
|
virtual void resetParser();
|
|
|
|
virtual void setReadBuffer(ReadBuffer & in_);
|
|
ReadBuffer & getReadBuffer() const { chassert(in); return *in; }
|
|
|
|
virtual const BlockMissingValues & getMissingValues() const
|
|
{
|
|
static const BlockMissingValues none;
|
|
return none;
|
|
}
|
|
|
|
/// Must be called from ParallelParsingInputFormat after readSuffix
|
|
ColumnMappingPtr getColumnMapping() const { return column_mapping; }
|
|
/// Must be called from ParallelParsingInputFormat before readPrefix
|
|
void setColumnMapping(ColumnMappingPtr column_mapping_) { column_mapping = column_mapping_; }
|
|
|
|
size_t getCurrentUnitNumber() const { return current_unit_number; }
|
|
void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }
|
|
|
|
void addBuffer(std::unique_ptr<ReadBuffer> buffer) { owned_buffers.emplace_back(std::move(buffer)); }
|
|
|
|
void setErrorsLogger(const InputFormatErrorsLoggerPtr & errors_logger_) { errors_logger = errors_logger_; }
|
|
|
|
virtual size_t getApproxBytesReadForChunk() const { return 0; }
|
|
|
|
void needOnlyCount() { need_only_count = true; }
|
|
|
|
protected:
|
|
virtual Chunk getChunkForCount(size_t rows);
|
|
|
|
ColumnMappingPtr column_mapping{};
|
|
|
|
InputFormatErrorsLoggerPtr errors_logger;
|
|
|
|
bool need_only_count = false;
|
|
|
|
private:
|
|
/// Number of currently parsed chunk (if parallel parsing is enabled)
|
|
size_t current_unit_number = 0;
|
|
|
|
std::vector<std::unique_ptr<ReadBuffer>> owned_buffers;
|
|
};
|
|
|
|
using InputFormatPtr = std::shared_ptr<IInputFormat>;
|
|
|
|
}
|