ClickHouse/dbms/src/Processors/Formats/IRowInputFormat.h

78 lines
2.0 KiB
C++
Raw Normal View History

#pragma once
#include <string>
#include <Columns/IColumn.h>
#include <Processors/Formats/IInputFormat.h>
namespace DB
{
2019-02-19 18:41:18 +00:00
/// Contains extra information about read data.
struct RowReadExtension
{
/// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
/// It's used to attach defaults for partially filled rows.
std::vector<UInt8> read_columns;
};
/// Common parameters for generating blocks.
struct RowInputFormatParams
{
size_t max_block_size;
UInt64 allow_errors_num;
Float64 allow_errors_ratio;
UInt64 rows_portion_size;
using ReadCallback = std::function<void()>;
ReadCallback callback;
2019-02-19 18:41:18 +00:00
};
///Row oriented input format: reads data row by row.
class IRowInputFormat : public IInputFormat
{
public:
2019-02-19 18:41:18 +00:00
using Params = RowInputFormatParams;
IRowInputFormat(
Block header,
2019-04-05 14:28:27 +00:00
ReadBuffer & in_,
Params params)
2019-04-05 14:28:27 +00:00
: IInputFormat(std::move(header), in_), params(params)
{
}
Chunk generate() override;
protected:
/** Read next row and append it to the columns.
* If no more rows - return false.
*/
2019-02-19 18:41:18 +00:00
virtual bool readRow(MutableColumns & columns, RowReadExtension & extra) = 0;
virtual void readPrefix() {} /// delimiter before begin of result
virtual void readSuffix() {} /// delimiter after end of result
/// Skip data until next row.
/// This is intended for text streams, that allow skipping of errors.
/// By default - throws not implemented exception.
virtual bool allowSyncAfterError() const { return false; }
virtual void syncAfterError();
/// In case of parse error, try to roll back and parse last one or two rows very carefully
/// and collect as much as possible diagnostic information about error.
/// If not implemented, returns empty string.
virtual std::string getDiagnosticInfo() { return {}; }
private:
Params params;
size_t total_rows = 0;
size_t num_errors = 0;
};
}