Merge pull request #35332 from Avogar/fix-tskv-schema-inference

Fix schema inference for TSKV format while using small max_read_buffer_size
This commit is contained in:
Kruglov Pavel 2022-03-23 18:37:07 +01:00 committed by GitHub
commit 826b933b08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 34 additions and 4 deletions

View File

@ -4,7 +4,6 @@
#include <Formats/EscapingRuleUtils.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
@ -242,15 +241,16 @@ std::unordered_map<String, DataTypePtr> TSKVSchemaReader::readRowAndGetNamesAndD
std::unordered_map<String, DataTypePtr> names_and_types;
StringRef name_ref;
String name_tmp;
String name_buf;
String value;
do
{
bool has_value = readName(in, name_ref, name_tmp);
bool has_value = readName(in, name_ref, name_buf);
String name = String(name_ref);
if (has_value)
{
readEscapedString(value, in);
names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped);
names_and_types[std::move(name)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped);
}
else
{

View File

@ -0,0 +1,8 @@
b Nullable(String)
c Nullable(String)
a Nullable(String)
s1 \N 1
} [2] 2
\N \N \N
\N \N \N
\N [3] \N

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
FILE_NAME=test_02240.data
DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME
touch $DATA_FILE
echo -e 'a=1\tb=s1\tc=\N
c=[2]\ta=2\tb=\N}
a=\N
c=[3]\ta=\N' > $DATA_FILE
$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "desc file('$FILE_NAME', 'TSKV')"
$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "select * from file('$FILE_NAME', 'TSKV')"