mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
disable parallel parsing for JSON in square brackets
This commit is contained in:
parent
5956f7400f
commit
47ad022a1f
@ -34,7 +34,7 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction()
|
||||
|
||||
unit.is_last = !have_more_data;
|
||||
unit.status = READY_TO_PARSE;
|
||||
scheduleParserThreadForUnitWithNumber(current_unit_number);
|
||||
scheduleParserThreadForUnitWithNumber(segmentator_ticket_number);
|
||||
++segmentator_ticket_number;
|
||||
|
||||
if (!have_more_data)
|
||||
@ -49,12 +49,13 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction()
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_number)
|
||||
void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_ticket_number)
|
||||
{
|
||||
try
|
||||
{
|
||||
setThreadName("ChunkParser");
|
||||
|
||||
const auto current_unit_number = current_ticket_number % processing_units.size();
|
||||
auto & unit = processing_units[current_unit_number];
|
||||
|
||||
/*
|
||||
@ -64,9 +65,9 @@ void ParallelParsingBlockInputStream::parserThreadFunction(size_t current_unit_n
|
||||
* can use it from multiple threads simultaneously.
|
||||
*/
|
||||
ReadBuffer read_buffer(unit.segment.data(), unit.segment.size(), 0);
|
||||
auto parser = std::make_unique<InputStreamFromInputFormat>(
|
||||
input_processor_creator(read_buffer, header,
|
||||
row_input_format_params, format_settings));
|
||||
auto format = input_processor_creator(read_buffer, header, row_input_format_params, format_settings);
|
||||
format->setCurrentUnitNumber(current_ticket_number);
|
||||
auto parser = std::make_unique<InputStreamFromInputFormat>(std::move(format));
|
||||
|
||||
unit.block_ext.block.clear();
|
||||
unit.block_ext.block_missing_values.clear();
|
||||
|
@ -213,9 +213,9 @@ private:
|
||||
std::deque<ProcessingUnit> processing_units;
|
||||
|
||||
|
||||
void scheduleParserThreadForUnitWithNumber(size_t unit_number)
|
||||
void scheduleParserThreadForUnitWithNumber(size_t ticket_number)
|
||||
{
|
||||
pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, unit_number));
|
||||
pool.scheduleOrThrowOnError(std::bind(&ParallelParsingBlockInputStream::parserThreadFunction, this, ticket_number));
|
||||
}
|
||||
|
||||
void finishAndWait()
|
||||
|
@ -144,9 +144,19 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
|
||||
// Doesn't make sense to use parallel parsing with less than four threads
|
||||
// (segmentator + two parsers + reader).
|
||||
if (settings.input_format_parallel_parsing
|
||||
&& file_segmentation_engine
|
||||
&& settings.max_threads >= 4)
|
||||
bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4;
|
||||
|
||||
if (parallel_parsing && name == "JSONEachRow")
|
||||
{
|
||||
/// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix()
|
||||
|
||||
/// For JSONEachRow we can safely skip whitespace characters
|
||||
skipWhitespaceIfAny(buf);
|
||||
if (buf.eof() || *buf.position() == '[')
|
||||
parallel_parsing = false; /// Disable it for JSONEachRow if data is in square brackets (see JSONEachRowRowInputFormat)
|
||||
}
|
||||
|
||||
if (parallel_parsing)
|
||||
{
|
||||
const auto & input_getter = getCreators(name).input_processor_creator;
|
||||
if (!input_getter)
|
||||
|
@ -38,6 +38,13 @@ public:
|
||||
static const BlockMissingValues none;
|
||||
return none;
|
||||
}
|
||||
|
||||
size_t getCurrentUnitNumber() const { return current_unit_number; }
|
||||
void setCurrentUnitNumber(size_t current_unit_number_) { current_unit_number = current_unit_number_; }
|
||||
|
||||
private:
|
||||
/// Number of currently parsed chunk (if parallel parsing is enabled)
|
||||
size_t current_unit_number = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -226,9 +226,11 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
|
||||
/// then seeking to next ;, or \n would trigger reading of an extra row at the end.
|
||||
|
||||
/// Semicolon is added for convenience as it could be used at end of INSERT query.
|
||||
if (getTotalRows() && !in.eof())
|
||||
bool is_first_row = getCurrentUnitNumber() == 0 && getTotalRows() == 1;
|
||||
if (!in.eof())
|
||||
{
|
||||
if (*in.position() == ',')
|
||||
/// There may be optional ',' (but not before the first row)
|
||||
if (!is_first_row && *in.position() == ',')
|
||||
++in.position();
|
||||
else if (!data_in_square_brackets && *in.position() == ';')
|
||||
{
|
||||
|
@ -1,6 +1,9 @@
|
||||
DROP TABLE IF EXISTS json_square_brackets;
|
||||
CREATE TABLE json_square_brackets (id UInt32, name String) ENGINE = Memory;
|
||||
INSERT INTO json_square_brackets FORMAT JSONEachRow [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]
|
||||
INSERT INTO json_square_brackets FORMAT JSONEachRow [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}];
|
||||
INSERT INTO json_square_brackets FORMAT JSONEachRow[];
|
||||
INSERT INTO json_square_brackets FORMAT JSONEachRow [ ] ;
|
||||
INSERT INTO json_square_brackets FORMAT JSONEachRow ;
|
||||
|
||||
SELECT * FROM json_square_brackets ORDER BY id;
|
||||
DROP TABLE IF EXISTS json_square_brackets;
|
||||
|
Loading…
Reference in New Issue
Block a user