2020-02-14 19:48:45 +00:00
|
|
|
#pragma once
|
|
|
|
|
2020-02-21 15:21:31 +00:00
|
|
|
#include <re2/re2.h>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2020-02-14 19:48:45 +00:00
|
|
|
#include <Core/Block.h>
|
|
|
|
#include <Processors/Formats/IRowInputFormat.h>
|
|
|
|
#include <Formats/FormatSettings.h>
|
|
|
|
#include <Formats/FormatFactory.h>
|
2020-02-21 15:21:31 +00:00
|
|
|
#include <IO/PeekableReadBuffer.h>
|
2020-02-14 19:48:45 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class ReadBuffer;
|
|
|
|
|
|
|
|
|
|
|
|
class RegexpRowInputFormat : public IRowInputFormat
|
|
|
|
{
|
|
|
|
public:
|
2020-02-18 13:53:12 +00:00
|
|
|
RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
|
2020-02-14 19:48:45 +00:00
|
|
|
|
|
|
|
String getName() const override { return "RegexpRowInputFormat"; }
|
|
|
|
|
|
|
|
bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
enum class FieldFormat
|
|
|
|
{
|
|
|
|
Escaped,
|
|
|
|
Quoted,
|
|
|
|
Csv,
|
|
|
|
Json,
|
|
|
|
};
|
|
|
|
|
|
|
|
bool readField(size_t index, MutableColumns & columns);
|
|
|
|
void readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext);
|
|
|
|
FieldFormat stringToFormat(const String & format);
|
|
|
|
|
2020-02-21 15:21:31 +00:00
|
|
|
PeekableReadBuffer buf;
|
2020-02-14 19:48:45 +00:00
|
|
|
const FormatSettings format_settings;
|
|
|
|
FieldFormat field_format;
|
2020-02-21 15:21:31 +00:00
|
|
|
|
|
|
|
RE2 regexp;
|
|
|
|
// The vector of fields extracted from line using regexp.
|
|
|
|
std::vector<std::string> matched_fields;
|
|
|
|
// These two vectors are needed to use RE2::FullMatchN (function for extracting fields).
|
|
|
|
std::vector<RE2::Arg> re2_arguments;
|
|
|
|
std::vector<RE2::Arg *> re2_arguments_ptrs;
|
2020-02-14 19:48:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|