2021-11-09 13:14:07 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
|
|
|
|
#include <Formats/ParsedTemplateFormatString.h>
|
|
|
|
#include <IO/PeekableReadBuffer.h>
|
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-02-06 04:14:01 +00:00
|
|
|
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes
|
2021-11-09 13:14:07 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
CustomSeparatedRowInputFormat(
|
|
|
|
const Block & header_,
|
|
|
|
ReadBuffer & in_,
|
|
|
|
const Params & params_,
|
|
|
|
bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
|
|
|
|
|
|
|
|
void resetParser() override;
|
|
|
|
String getName() const override { return "CustomSeparatedRowInputFormat"; }
|
2021-12-10 17:54:08 +00:00
|
|
|
void setReadBuffer(ReadBuffer & in_) override;
|
|
|
|
|
2021-11-09 13:14:07 +00:00
|
|
|
private:
|
2021-11-15 19:59:24 +00:00
|
|
|
CustomSeparatedRowInputFormat(
|
|
|
|
const Block & header_,
|
2021-12-14 16:08:08 +00:00
|
|
|
std::unique_ptr<PeekableReadBuffer> in_buf_,
|
2021-11-15 19:59:24 +00:00
|
|
|
const Params & params_,
|
|
|
|
bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_settings_);
|
2021-12-15 11:30:57 +00:00
|
|
|
|
|
|
|
bool allowSyncAfterError() const override;
|
|
|
|
void syncAfterError() override;
|
|
|
|
|
|
|
|
std::unique_ptr<PeekableReadBuffer> buf;
|
|
|
|
bool ignore_spaces;
|
|
|
|
};
|
|
|
|
|
2022-02-06 04:14:01 +00:00
|
|
|
class CustomSeparatedFormatReader final : public FormatWithNamesAndTypesReader
|
2021-12-15 11:30:57 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
CustomSeparatedFormatReader(PeekableReadBuffer & buf_, bool ignore_spaces_, const FormatSettings & format_settings_);
|
|
|
|
|
2021-11-09 13:14:07 +00:00
|
|
|
using EscapingRule = FormatSettings::EscapingRule;
|
|
|
|
|
|
|
|
bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override;
|
|
|
|
|
|
|
|
void skipField(size_t /*file_column*/) override { skipField(); }
|
|
|
|
void skipField();
|
|
|
|
void skipNames() override { skipHeaderRow(); }
|
|
|
|
void skipTypes() override { skipHeaderRow(); }
|
|
|
|
void skipHeaderRow();
|
|
|
|
|
|
|
|
void skipPrefixBeforeHeader() override;
|
|
|
|
void skipRowStartDelimiter() override;
|
|
|
|
void skipFieldDelimiter() override;
|
|
|
|
void skipRowEndDelimiter() override;
|
|
|
|
void skipRowBetweenDelimiter() override;
|
|
|
|
|
|
|
|
bool checkForSuffix() override;
|
|
|
|
|
|
|
|
bool parseRowStartWithDiagnosticInfo(WriteBuffer & out) override;
|
|
|
|
bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
|
|
|
|
bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override;
|
|
|
|
bool parseRowBetweenDelimiterWithDiagnosticInfo(WriteBuffer & out) override;
|
|
|
|
bool tryParseSuffixWithDiagnosticInfo(WriteBuffer & out) override;
|
|
|
|
|
|
|
|
std::vector<String> readNames() override { return readHeaderRow(); }
|
|
|
|
std::vector<String> readTypes() override { return readHeaderRow(); }
|
2021-12-15 11:30:57 +00:00
|
|
|
std::vector<String> readHeaderRow() {return readRowImpl<true>(); }
|
|
|
|
|
|
|
|
std::vector<String> readRow() { return readRowImpl<false>(); }
|
2021-11-09 13:14:07 +00:00
|
|
|
|
|
|
|
bool checkEndOfRow();
|
|
|
|
bool checkForSuffixImpl(bool check_eof);
|
2021-12-10 17:54:08 +00:00
|
|
|
inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
|
2021-11-09 13:14:07 +00:00
|
|
|
|
2021-12-15 11:30:57 +00:00
|
|
|
EscapingRule getEscapingRule() { return format_settings.custom.escaping_rule; }
|
|
|
|
|
|
|
|
void setReadBuffer(ReadBuffer & in_) override;
|
|
|
|
private:
|
|
|
|
template <bool is_header>
|
|
|
|
std::vector<String> readRowImpl();
|
|
|
|
|
|
|
|
template <bool read_string>
|
|
|
|
String readFieldIntoString(bool is_first);
|
|
|
|
|
|
|
|
PeekableReadBuffer * buf;
|
2021-11-09 13:14:07 +00:00
|
|
|
bool ignore_spaces;
|
2021-12-15 11:30:57 +00:00
|
|
|
size_t columns = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
class CustomSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader
|
|
|
|
{
|
|
|
|
public:
|
2022-03-24 12:54:12 +00:00
|
|
|
CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_);
|
2021-12-15 11:30:57 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
DataTypes readRowAndGetDataTypes() override;
|
|
|
|
|
|
|
|
PeekableReadBuffer buf;
|
|
|
|
CustomSeparatedFormatReader reader;
|
|
|
|
bool first_row = true;
|
2021-11-09 13:14:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|