2011-08-09 19:19:00 +00:00
|
|
|
#pragma once
|
2010-05-21 19:53:44 +00:00
|
|
|
|
2019-04-22 13:31:17 +00:00
|
|
|
#include <optional>
|
|
|
|
#include <unordered_map>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Block.h>
|
2018-06-10 19:22:49 +00:00
|
|
|
#include <Formats/FormatSettings.h>
|
|
|
|
#include <Formats/IRowInputStream.h>
|
2010-05-21 19:53:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
class ReadBuffer;
|
|
|
|
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** A stream to input data in tsv format.
|
2010-05-21 19:53:44 +00:00
|
|
|
*/
|
|
|
|
class TabSeparatedRowInputStream : public IRowInputStream
|
|
|
|
{
|
|
|
|
public:
|
2017-05-13 22:19:04 +00:00
|
|
|
/** with_names - the first line is the header with the names of the columns
|
|
|
|
* with_types - on the next line header with type names
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
2018-06-08 01:51:55 +00:00
|
|
|
TabSeparatedRowInputStream(
|
|
|
|
ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings);
|
2010-05-21 19:53:44 +00:00
|
|
|
|
2019-04-22 13:31:17 +00:00
|
|
|
bool read(MutableColumns & columns, RowReadExtension & ext) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
void readPrefix() override;
|
2018-06-03 16:51:31 +00:00
|
|
|
bool allowSyncAfterError() const override { return true; }
|
2017-04-01 07:20:54 +00:00
|
|
|
void syncAfterError() override;
|
2017-01-27 04:29:47 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string getDiagnosticInfo() override;
|
2010-05-21 19:53:44 +00:00
|
|
|
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
ReadBuffer & istr;
|
2017-12-18 02:43:40 +00:00
|
|
|
Block header;
|
2017-04-01 07:20:54 +00:00
|
|
|
bool with_names;
|
|
|
|
bool with_types;
|
2018-06-08 01:51:55 +00:00
|
|
|
const FormatSettings format_settings;
|
2017-04-01 07:20:54 +00:00
|
|
|
DataTypes data_types;
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2019-04-22 13:31:17 +00:00
|
|
|
using IndexesMap = std::unordered_map<String, size_t>;
|
|
|
|
IndexesMap column_indexes_by_names;
|
|
|
|
|
|
|
|
using OptionalIndexes = std::vector<std::optional<size_t>>;
|
2019-04-22 14:00:51 +00:00
|
|
|
OptionalIndexes column_indexes_for_input_fields;
|
2019-04-22 13:31:17 +00:00
|
|
|
|
|
|
|
std::vector<UInt8> read_columns;
|
|
|
|
std::vector<size_t> columns_to_fill_with_default_values;
|
|
|
|
|
|
|
|
void addInputColumn(const String & column_name);
|
|
|
|
void setupAllColumnsByTableSchema();
|
|
|
|
void fillUnreadColumnsWithDefaults(MutableColumns & columns, RowReadExtension& ext);
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// For convenient diagnostics in case of an error.
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t row_num = 0;
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// How many bytes were read, not counting those still in the buffer.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t bytes_read_at_start_of_buffer_on_current_row = 0;
|
|
|
|
size_t bytes_read_at_start_of_buffer_on_prev_row = 0;
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
char * pos_of_current_row = nullptr;
|
|
|
|
char * pos_of_prev_row = nullptr;
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void updateDiagnosticInfo();
|
2015-03-29 07:13:38 +00:00
|
|
|
|
2017-12-14 20:58:18 +00:00
|
|
|
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
|
2017-04-01 07:20:54 +00:00
|
|
|
WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name);
|
2010-05-21 19:53:44 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|