ClickHouse/src/Processors/Formats/IRowInputFormat.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

99 lines
3.1 KiB
C++
Raw Normal View History

#pragma once
#include <string>
#include <Columns/IColumn.h>
#include <Processors/Formats/IInputFormat.h>
2021-10-15 20:18:20 +00:00
#include <QueryPipeline/SizeLimits.h>
#include <Poco/Timespan.h>
2021-08-11 14:19:45 +00:00
class Stopwatch;
namespace DB
{
2019-02-19 18:41:18 +00:00
/// Contains extra information about read data.
struct RowReadExtension
{
2021-04-13 20:04:13 +00:00
/// IRowInputFormat::read output. It contains non zero for columns that actually read from the source and zero otherwise.
2019-02-19 18:41:18 +00:00
/// It's used to attach defaults for partially filled rows.
std::vector<UInt8> read_columns;
};
/// Common parameters for generating blocks.
struct RowInputFormatParams
{
2021-05-08 15:35:09 +00:00
size_t max_block_size = 0;
2019-02-19 18:41:18 +00:00
2021-05-08 15:35:09 +00:00
UInt64 allow_errors_num = 0;
Float64 allow_errors_ratio = 0;
Poco::Timespan max_execution_time = 0;
OverflowMode timeout_overflow_mode = OverflowMode::THROW;
2019-02-19 18:41:18 +00:00
};
2019-09-02 12:57:22 +00:00
bool isParseError(int code);
bool checkTimeLimit(const RowInputFormatParams & params, const Stopwatch & stopwatch);
2019-09-02 12:57:22 +00:00
/// Row oriented input format: reads data row by row.
class IRowInputFormat : public IInputFormat
{
public:
2019-02-19 18:41:18 +00:00
using Params = RowInputFormatParams;
2021-03-09 14:46:52 +00:00
IRowInputFormat(Block header, ReadBuffer & in_, Params params_);
Chunk generate() override;
void resetParser() override;
protected:
/** Read next row and append it to the columns.
* If no more rows - return false.
*/
2019-02-19 18:41:18 +00:00
virtual bool readRow(MutableColumns & columns, RowReadExtension & extra) = 0;
/// Count some rows. Called in a loop until it returns 0, and the return values are added up.
/// `max_block_size` is the recommended number of rows after which to stop, if the implementation
/// involves scanning the data. If the implementation just takes the count from metadata,
/// `max_block_size` can be ignored.
virtual size_t countRows(size_t max_block_size);
virtual bool supportsCountRows() const { return false; }
virtual void readPrefix() {} /// delimiter before begin of result
virtual void readSuffix() {} /// delimiter after end of result
/// Skip data until next row.
/// This is intended for text streams, that allow skipping of errors.
/// By default - throws not implemented exception.
virtual bool allowSyncAfterError() const { return false; }
virtual void syncAfterError();
/// In case of parse error, try to roll back and parse last one or two rows very carefully
/// and collect as much as possible diagnostic information about error.
/// If not implemented, returns empty string.
virtual std::string getDiagnosticInfo() { return {}; }
/// Get diagnostic info and raw data for a row
2022-09-08 16:37:18 +00:00
virtual std::pair<std::string, std::string> getDiagnosticAndRawData() { return std::make_pair("", ""); }
void logError();
2019-07-30 18:48:40 +00:00
const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
size_t getTotalRows() const { return total_rows; }
size_t getApproxBytesReadForChunk() const override { return approx_bytes_read_for_chunk; }
2021-03-09 14:46:52 +00:00
Serializations serializations;
private:
Params params;
size_t total_rows = 0;
size_t num_errors = 0;
2019-07-30 18:48:40 +00:00
BlockMissingValues block_missing_values;
2023-07-06 13:48:57 +00:00
size_t approx_bytes_read_for_chunk = 0;
};
}