more tests and fixes

This commit is contained in:
Alexander Tokmakov 2019-08-28 19:46:54 +03:00
parent 93c672aa0b
commit c8096542bf
3 changed files with 61 additions and 11 deletions

View File

@ -49,7 +49,7 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
}
else
{
if (format.formats[i] == ColumnFormat::None || format.formats[i] == ColumnFormat::Xml || format.formats[i] == ColumnFormat::Raw)
if (format.formats[i] == ColumnFormat::Xml || format.formats[i] == ColumnFormat::Raw)
throw Exception("None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
}
}
@ -66,11 +66,14 @@ TemplateRowInputFormat::TemplateRowInputFormat(ReadBuffer & in_, const Block & h
std::vector<UInt8> column_in_format(header_.columns(), false);
for (size_t i = 0; i < row_format.columnsCount(); ++i)
{
if (row_format.formats[i] == ColumnFormat::None || row_format.formats[i] == ColumnFormat::Xml || row_format.formats[i] == ColumnFormat::Raw)
if (row_format.formats[i] == ColumnFormat::Xml || row_format.formats[i] == ColumnFormat::Raw)
throw Exception("invalid template format: None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
if (format.format_idx_to_column_idx[i])
if (row_format.format_idx_to_column_idx[i])
{
if (row_format.formats[i] == ColumnFormat::None)
throw Exception("invalid template format: None, XML and Raw deserialization is not supported", ErrorCodes::INVALID_TEMPLATE_FORMAT);
size_t col_idx = *row_format.format_idx_to_column_idx[i];
if (column_in_format[col_idx])
throw Exception("invalid template format: duplicate column " + header_.getColumnsWithTypeAndName()[col_idx].name,
@ -95,10 +98,10 @@ ReturnType TemplateRowInputFormat::tryReadPrefixOrSuffix(size_t input_part_beg,
skipSpaces();
if constexpr (throw_exception)
assertString(format.delimiters[input_part_end], buf);
assertString(format.delimiters[input_part_beg], buf);
else
{
if (likely(!checkString(format.delimiters[input_part_end], buf)))
if (likely(!checkString(format.delimiters[input_part_beg], buf)))
return ReturnType(false);
}
@ -219,6 +222,8 @@ void TemplateRowInputFormat::deserializeField(const IDataType & type, IColumn &
void TemplateRowInputFormat::skipField(TemplateRowInputFormat::ColumnFormat col_format)
{
String tmp;
constexpr const char * field_name = "<SKIPPED COLUMN>";
constexpr size_t field_name_len = 16;
try
{
switch (col_format)
@ -236,7 +241,7 @@ void TemplateRowInputFormat::skipField(TemplateRowInputFormat::ColumnFormat col_
readCSVString(tmp, buf, settings.csv);
break;
case ColumnFormat::Json:
readJSONString(tmp, buf);
skipJSONField(buf, StringRef(field_name, field_name_len));
break;
default:
__builtin_unreachable();

View File

@ -1,3 +1,4 @@
==== check escaping ====
"qwe,rty","as""df'gh","","zx
cv bn m",123,"2016-01-01"
"as""df'gh","","zx
@ -6,3 +7,19 @@ cv bn m","qwe,rty",456,"2016-01-02"
cv bn m","qwe,rty","as""df'gh","",789,"2016-01-04"
"","zx
cv bn m","qwe,rty","as""df'gh",9876543210,"2016-01-03"
==== parse json (sophisticated template) ====
"qwe,rty","as""df'gh","","zx
cv bn m",123,"2016-01-01"
"as""df'gh","","zx
cv bn m","qwe,rty",456,"2016-01-02"
"zx
cv bn m","qwe,rty","as""df'gh","",789,"2016-01-04"
"","zx
cv bn m","qwe,rty","as""df'gh",9876543210,"2016-01-03"
==== parse json ====
"","","qwe,rty","",123,"2016-01-01"
"zx
cv bn m","","as""df'gh","",456,"2016-01-02"
"as""df'gh","","zx
cv bn m","",789,"2016-01-04"
"qwe,rty","","","",9876543210,"2016-01-03"

View File

@ -3,8 +3,12 @@
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template";
$CLICKHOUSE_CLIENT --query="CREATE TABLE template (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory";
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template1";
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS template2";
$CLICKHOUSE_CLIENT --query="CREATE TABLE template1 (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory";
$CLICKHOUSE_CLIENT --query="CREATE TABLE template2 (s1 String, s2 String, s3 String, s4 String, n UInt64, d Date) ENGINE = Memory";
echo "==== check escaping ===="
echo "{prefix}
n: 123, s1: qwe,rty , s2: 'as\"df\\'gh', s3: \"\", s4: \"zx
@ -12,7 +16,31 @@ cv bn m\", d: 2016-01-01 ;
n: 456, s1: as\"df\\'gh , s2: '', s3: \"zx\\ncv\\tbn m\", s4: \"qwe,rty\", d: 2016-01-02 ;
n: 9876543210, s1: , s2: 'zx\\ncv\\tbn m', s3: \"qwe,rty\", s4: \"as\"\"df'gh\", d: 2016-01-03 ;
n: 789, s1: zx\\ncv\\tbn m , s2: 'qwe,rty', s3: \"as\\\"df'gh\", s4: \"\", d: 2016-01-04
$ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template FORMAT Template SETTINGS format_schema = '{prefix} \n\${data}\n \$\$ suffix \$\$\n', format_schema_rows = 'n:\t\${n:Escaped}, s1:\t\${s1:Escaped}\t, s2:\t\${s2:Quoted}, s3:\t\${s3:JSON}, s4:\t\${s4:CSV}, d:\t\${d:Escaped}\t', format_schema_rows_between_delimiter = ';\n'";
$ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 FORMAT Template SETTINGS \
format_schema = '{prefix} \n\${data}\n \$\$ suffix \$\$\n', \
format_schema_rows = 'n:\t\${n:Escaped}, s1:\t\${s1:Escaped}\t, s2:\t\${s2:Quoted}, s3:\t\${s3:JSON}, s4:\t\${s4:CSV}, d:\t\${d:Escaped}\t', \
format_schema_rows_between_delimiter = ';\n'";
$CLICKHOUSE_CLIENT --query="SELECT * FROM template ORDER BY n FORMAT CSV";
$CLICKHOUSE_CLIENT --query="DROP TABLE template";
$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT CSV";
echo "==== parse json (sophisticated template) ===="
$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 FORMAT TemplateIgnoreSpaces SETTINGS \
format_schema = '{\${:}\"meta\"\${:}:\${:}[\${:}{\${:}\"name\"\${:}:\${:}\"s1\"\${:},\${:}\"type\"\${:}:\${:}\"String\"\${:}}\${:},\${:}{\${:}\"name\"\${:}:\${:}\"s2\"\${:},\${:}\"type\"\${:}:\${:}\"String\"\${:}}\${:},\${:}{\${:}\"name\"\${:}:\${:}\"s3\"\${:},\${:}\"type\"\${:}:\${:}\"String\"\${:}}\${:},\${:}{\${:}\"name\"\${:}:\${:}\"s4\"\${:},\${:}\"type\"\${:}:\${:}\"String\"\${:}}\${:},\${:}{\${:}\"name\"\${:}:\${:}\"n\"\${:},\${:}\"type\"\${:}:\${:}\"UInt64\"\${:}}\${:},\${:}{\${:}\"name\"\${:}:\${:}\"d\"\${:},\${:}\"type\"\${:}:\${:}\"Date\"\${:}}\${:}]\${:},\${:}\"data\"\${:}:\${:}[\${data}]\${:},\${:}\"rows\"\${:}:\${:}\${:CSV}\${:},\${:}\"statistics\"\${:}:\${:}{\${:}\"elapsed\"\${:}:\${:}\${:CSV}\${:},\${:}\"rows_read\"\${:}:\${:}\${:CSV}\${:},\${:}\"bytes_read\"\${:}:\${:}\${:CSV}\${:}}\${:}}', \
format_schema_rows = '{\${:}\"s1\"\${:}:\${:}\${s1:JSON}\${:},\${:}\"s2\"\${:}:\${:}\${s2:JSON}\${:},\${:}\"s3\"\${:}:\${:}\${s3:JSON}\${:},\${:}\"s4\"\${:}:\${:}\${s4:JSON}\${:},\${:}\"n\"\${:}:\${:}\${n:JSON}\${:},\${:}\"d\"\${:}:\${:}\${d:JSON}\${:}\${:}}', \
format_schema_rows_between_delimiter = ','";
$CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV";
$CLICKHOUSE_CLIENT --query="TRUNCATE TABLE template2";
echo "==== parse json ===="
$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 FORMAT TemplateIgnoreSpaces SETTINGS \
format_schema = '{\${:}\"meta\"\${:}:\${:JSON},\${:}\"data\"\${:}:\${:}[\${data}]\${:},\${:}\"rows\"\${:}:\${:JSON},\${:}\"statistics\"\${:}:\${:JSON}\${:}}', \
format_schema_rows = '{\${:}\"s1\"\${:}:\${:}\${s3:JSON}\${:},\${:}\"s2\"\${:}:\${:}\${:JSON}\${:},\${:}\"s3\"\${:}:\${:}\${s1:JSON}\${:},\${:}\"s4\"\${:}:\${:}\${:JSON}\${:},\${:}\"n\"\${:}:\${:}\${n:JSON}\${:},\${:}\"d\"\${:}:\${:}\${d:JSON}\${:}\${:}}', \
format_schema_rows_between_delimiter = ','";
$CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV";
$CLICKHOUSE_CLIENT --query="DROP TABLE template1";
$CLICKHOUSE_CLIENT --query="DROP TABLE template2";