2018-05-24 01:02:16 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <Processors/Formats/IInputFormat.h>
|
2019-08-01 14:25:41 +00:00
|
|
|
#include <DataStreams/SizeLimits.h>
|
|
|
|
#include <Poco/Timespan.h>
|
|
|
|
#include <Common/Stopwatch.h>
|
2018-05-24 01:02:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-02-19 18:41:18 +00:00
|
|
|
/// Contains extra information about read data.
|
|
|
|
struct RowReadExtension
|
|
|
|
{
|
|
|
|
/// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
|
|
|
|
/// It's used to attach defaults for partially filled rows.
|
|
|
|
std::vector<UInt8> read_columns;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Common parameters for generating blocks.
|
|
|
|
struct RowInputFormatParams
|
|
|
|
{
|
|
|
|
size_t max_block_size;
|
|
|
|
|
|
|
|
UInt64 allow_errors_num;
|
|
|
|
Float64 allow_errors_ratio;
|
2019-07-24 18:00:09 +00:00
|
|
|
|
2019-08-01 14:25:41 +00:00
|
|
|
Poco::Timespan max_execution_time = 0;
|
|
|
|
OverflowMode timeout_overflow_mode = OverflowMode::THROW;
|
2019-02-19 18:41:18 +00:00
|
|
|
};
|
|
|
|
|
2019-09-02 12:57:22 +00:00
|
|
|
bool isParseError(int code);
|
2019-09-02 16:26:22 +00:00
|
|
|
bool checkTimeLimit(const RowInputFormatParams & params, const Stopwatch & stopwatch);
|
2019-09-02 12:57:22 +00:00
|
|
|
|
2020-11-03 20:32:18 +00:00
|
|
|
/// Row oriented input format: reads data row by row.
|
2018-05-24 01:02:16 +00:00
|
|
|
class IRowInputFormat : public IInputFormat
|
|
|
|
{
|
|
|
|
public:
|
2019-02-19 18:41:18 +00:00
|
|
|
using Params = RowInputFormatParams;
|
2018-05-24 01:02:16 +00:00
|
|
|
|
|
|
|
IRowInputFormat(
|
|
|
|
Block header,
|
2019-04-05 14:28:27 +00:00
|
|
|
ReadBuffer & in_,
|
2019-08-03 11:02:40 +00:00
|
|
|
Params params_)
|
|
|
|
: IInputFormat(std::move(header), in_), params(params_)
|
2018-05-24 01:02:16 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2019-02-18 16:36:07 +00:00
|
|
|
Chunk generate() override;
|
2018-05-24 01:02:16 +00:00
|
|
|
|
2019-11-26 23:46:19 +00:00
|
|
|
void resetParser() override;
|
|
|
|
|
2018-05-24 01:02:16 +00:00
|
|
|
protected:
|
|
|
|
/** Read next row and append it to the columns.
|
|
|
|
* If no more rows - return false.
|
|
|
|
*/
|
2019-02-19 18:41:18 +00:00
|
|
|
virtual bool readRow(MutableColumns & columns, RowReadExtension & extra) = 0;
|
2018-05-24 01:02:16 +00:00
|
|
|
|
2019-04-05 11:39:07 +00:00
|
|
|
virtual void readPrefix() {} /// delimiter before begin of result
|
|
|
|
virtual void readSuffix() {} /// delimiter after end of result
|
2018-05-24 01:02:16 +00:00
|
|
|
|
|
|
|
/// Skip data until next row.
|
|
|
|
/// This is intended for text streams, that allow skipping of errors.
|
|
|
|
/// By default - throws not implemented exception.
|
|
|
|
virtual bool allowSyncAfterError() const { return false; }
|
|
|
|
virtual void syncAfterError();
|
|
|
|
|
|
|
|
/// In case of parse error, try to roll back and parse last one or two rows very carefully
|
|
|
|
/// and collect as much as possible diagnostic information about error.
|
|
|
|
/// If not implemented, returns empty string.
|
2019-04-05 11:39:07 +00:00
|
|
|
virtual std::string getDiagnosticInfo() { return {}; }
|
2018-05-24 01:02:16 +00:00
|
|
|
|
2019-07-30 18:48:40 +00:00
|
|
|
const BlockMissingValues & getMissingValues() const override { return block_missing_values; }
|
|
|
|
|
2020-02-07 09:58:29 +00:00
|
|
|
size_t getTotalRows() const { return total_rows; }
|
|
|
|
|
2018-05-24 01:02:16 +00:00
|
|
|
private:
|
|
|
|
Params params;
|
|
|
|
|
|
|
|
size_t total_rows = 0;
|
|
|
|
size_t num_errors = 0;
|
2019-07-30 18:48:40 +00:00
|
|
|
|
|
|
|
BlockMissingValues block_missing_values;
|
2018-05-24 01:02:16 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|