Fix schema inference for JSONEachRow and JSONCompactEachRow

This commit is contained in:
avogar 2022-01-20 16:31:24 +03:00
parent f644602ec8
commit 67e396f8f4
6 changed files with 48 additions and 1 deletions

View File

@ -188,6 +188,16 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer
DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes()
{
if (first_row)
first_row = false;
else
{
skipWhitespaceIfAny(in);
/// ',' and ';' are possible between the rows.
if (!in.eof() && (*in.position() == ',' || *in.position() == ';'))
++in.position();
}
skipWhitespaceIfAny(in);
if (in.eof())
return {};

View File

@ -81,6 +81,7 @@ private:
DataTypes readRowAndGetDataTypes() override;
JSONCompactEachRowFormatReader reader;
bool first_row = true;
};
}

View File

@ -318,9 +318,24 @@ std::unordered_map<String, DataTypePtr> JSONEachRowSchemaReader::readRowAndGetNa
{
skipBOMIfExists(in);
skipWhitespaceIfAny(in);
checkChar('[', in);
if (checkChar('[', in))
data_in_square_brackets = true;
first_row = false;
}
else
{
skipWhitespaceIfAny(in);
/// If data is in square brackets then ']' means the end of data.
if (data_in_square_brackets && checkChar(']', in))
return {};
/// ';' means end of data.
if (checkChar(';', in))
return {};
/// There may be optional ',' between rows.
checkChar(',', in);
}
skipWhitespaceIfAny(in);
if (in.eof())

View File

@ -95,6 +95,7 @@ private:
bool json_strings;
bool first_row = true;
bool data_in_square_brackets = false;
};
}

View File

@ -0,0 +1,6 @@
0
1
2
0
1
2

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Tags: no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
echo '[{"number":"0"} ,{"number":"1"} , {"number":"2"}]' > $CLICKHOUSE_TMP/02182_data
$CLICKHOUSE_LOCAL -q "SELECT * FROM table" --file $CLICKHOUSE_TMP/02182_data --input-format JSONEachRow
echo '["0"] ,["1"] ; ["2"]' > $CLICKHOUSE_TMP/02182_data
$CLICKHOUSE_LOCAL -q "SELECT * FROM table" --file $CLICKHOUSE_TMP/02182_data --input-format JSONCompactEachRow