#pragma once #include #include #include #include #include #include #include #include namespace DB { class ReadBuffer; class RegexpRowInputFormat : public IRowInputFormat { public: RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_); String getName() const override { return "RegexpRowInputFormat"; } bool readRow(MutableColumns & columns, RowReadExtension & ext) override; private: enum class FieldFormat { Escaped, Quoted, Csv, Json, }; bool readField(size_t index, MutableColumns & columns); void readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext); FieldFormat stringToFormat(const String & format); PeekableReadBuffer buf; const FormatSettings format_settings; FieldFormat field_format; RE2 regexp; // The vector of fields extracted from line using regexp. std::vector matched_fields; // These two vectors are needed to use RE2::FullMatchN (function for extracting fields). std::vector re2_arguments; std::vector re2_arguments_ptrs; }; }