mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Remove code duplication, use simdjson and rapidjson instead of Poco
This commit is contained in:
parent
e3dbfe6bf6
commit
26abf7aa62
@ -526,6 +526,14 @@ if (USE_BZIP2)
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if(USE_SIMDJSON)
|
||||
dbms_target_link_libraries(PRIVATE simdjson)
|
||||
endif()
|
||||
|
||||
if(USE_RAPIDJSON)
|
||||
dbms_target_include_directories(SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
dbms_target_link_libraries(PUBLIC consistent-hashing)
|
||||
|
||||
include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")
|
||||
|
@ -288,13 +288,7 @@ DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSe
|
||||
return parsed ? type : nullptr;
|
||||
}
|
||||
case FormatSettings::EscapingRule::JSON:
|
||||
{
|
||||
Poco::JSON::Parser parser;
|
||||
String json = "{\"field\" : " + field + "}";
|
||||
auto var = parser.parse(json);
|
||||
Poco::JSON::Object::Ptr object = var.extract<Poco::JSON::Object::Ptr>();
|
||||
return getDataTypeFromJSONField(object->get("field"));
|
||||
}
|
||||
return getDataTypeFromJSONField(field);
|
||||
case FormatSettings::EscapingRule::CSV:
|
||||
{
|
||||
if (field.empty() || field == format_settings.csv.null_representation)
|
||||
|
@ -9,10 +9,14 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <Poco/JSON/Parser.h>
|
||||
#include <Functions/SimdJSONParser.h>
|
||||
#include <Functions/RapidJSONParser.h>
|
||||
#include <Functions/DummyJSONParser.h>
|
||||
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -34,7 +38,7 @@ static std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer
|
||||
while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size || number_of_rows < min_rows))
|
||||
{
|
||||
const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
|
||||
if (current_object_size > 10 * min_chunk_size)
|
||||
if (min_chunk_size != 0 && current_object_size > 10 * min_chunk_size)
|
||||
throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
|
||||
std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
|
||||
" bytes per row. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely JSON is malformed", ErrorCodes::INCORRECT_DATA);
|
||||
@ -103,87 +107,20 @@ static std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer
|
||||
template <const char opening_bracket, const char closing_bracket>
|
||||
static String readJSONEachRowLineIntoStringImpl(ReadBuffer & in)
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
|
||||
if (in.eof())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot read JSON object: unexpected end of file");
|
||||
|
||||
char * pos = in.position();
|
||||
if (*pos != opening_bracket)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot read JSONEachRow line: {} expected, {} got", opening_bracket, *in.position());
|
||||
++pos;
|
||||
|
||||
Memory memory;
|
||||
size_t balance = 1;
|
||||
bool quotes = false;
|
||||
while (loadAtPosition(in, memory, pos) && balance)
|
||||
{
|
||||
if (quotes)
|
||||
{
|
||||
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
|
||||
|
||||
if (pos == in.buffer().end())
|
||||
continue;
|
||||
|
||||
if (*pos == '\\')
|
||||
{
|
||||
++pos;
|
||||
if (loadAtPosition(in, memory, pos))
|
||||
++pos;
|
||||
}
|
||||
else if (*pos == '"')
|
||||
{
|
||||
++pos;
|
||||
quotes = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pos = find_first_symbols<opening_bracket, closing_bracket, '\\', '"'>(pos, in.buffer().end());
|
||||
|
||||
if (pos == in.buffer().end())
|
||||
continue;
|
||||
|
||||
else if (*pos == opening_bracket)
|
||||
{
|
||||
++balance;
|
||||
++pos;
|
||||
}
|
||||
else if (*pos == closing_bracket)
|
||||
{
|
||||
--balance;
|
||||
++pos;
|
||||
}
|
||||
else if (*pos == '\\')
|
||||
{
|
||||
++pos;
|
||||
if (loadAtPosition(in, memory, pos))
|
||||
++pos;
|
||||
}
|
||||
else if (*pos == '"')
|
||||
{
|
||||
quotes = true;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (balance)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot read JSON object: unexpected end of file");
|
||||
|
||||
saveUpToPosition(in, memory, pos);
|
||||
fileSegmentationEngineJSONEachRowImpl<opening_bracket, closing_bracket>(in, memory, 0, 1);
|
||||
return String(memory.data(), memory.size());
|
||||
}
|
||||
|
||||
DataTypePtr getDataTypeFromJSONField(const Poco::Dynamic::Var & field)
|
||||
DataTypePtr getDataTypeFromJSONFieldImpl(const SimdJSONParser::Element & field)
|
||||
{
|
||||
if (field.isEmpty())
|
||||
if (field.isNull())
|
||||
return nullptr;
|
||||
|
||||
if (field.isBoolean())
|
||||
if (field.isBool())
|
||||
return makeNullable(std::make_shared<DataTypeUInt8>());
|
||||
|
||||
if (field.isNumeric())
|
||||
if (field.isInt64() || field.isUInt64() || field.isDouble())
|
||||
return makeNullable(std::make_shared<DataTypeFloat64>());
|
||||
|
||||
if (field.isString())
|
||||
@ -191,18 +128,18 @@ DataTypePtr getDataTypeFromJSONField(const Poco::Dynamic::Var & field)
|
||||
|
||||
if (field.isArray())
|
||||
{
|
||||
Poco::JSON::Array::Ptr array = field.extract<Poco::JSON::Array::Ptr>();
|
||||
auto array = field.getArray();
|
||||
|
||||
/// Return nullptr in case of empty array because we cannot determine nested type.
|
||||
if (array->size() == 0)
|
||||
if (array.size() == 0)
|
||||
return nullptr;
|
||||
|
||||
DataTypes nested_data_types;
|
||||
/// If this array contains fields with different types we will treat it as Tuple.
|
||||
bool is_tuple = false;
|
||||
for (size_t i = 0; i != array->size(); ++i)
|
||||
for (const auto element : array)
|
||||
{
|
||||
auto type = getDataTypeFromJSONField(array->get(i));
|
||||
auto type = getDataTypeFromJSONFieldImpl(element);
|
||||
if (!type)
|
||||
return nullptr;
|
||||
|
||||
@ -218,14 +155,13 @@ DataTypePtr getDataTypeFromJSONField(const Poco::Dynamic::Var & field)
|
||||
return std::make_shared<DataTypeArray>(nested_data_types.back());
|
||||
}
|
||||
|
||||
if (field.type() == typeid(Poco::JSON::Object::Ptr))
|
||||
if (field.isObject())
|
||||
{
|
||||
Poco::JSON::Object::Ptr object = field.extract<Poco::JSON::Object::Ptr>();
|
||||
auto names = object->getNames();
|
||||
auto object = field.getObject();
|
||||
DataTypePtr value_type;
|
||||
for (const auto & name : names)
|
||||
for (const auto key_value_pair : object)
|
||||
{
|
||||
auto type = getDataTypeFromJSONField(object->get(name));
|
||||
auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second);
|
||||
if (!type)
|
||||
return nullptr;
|
||||
|
||||
@ -237,23 +173,45 @@ DataTypePtr getDataTypeFromJSONField(const Poco::Dynamic::Var & field)
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
|
||||
}
|
||||
|
||||
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type {}", field.type().name()};
|
||||
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
|
||||
}
|
||||
|
||||
using JSONEachRowFieldExtractor = std::function<std::vector<Poco::Dynamic::Var>(const Poco::Dynamic::Var &)>;
|
||||
|
||||
template <const char opening_bracket, const char closing_bracket>
|
||||
static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, JSONEachRowFieldExtractor extractor)
|
||||
auto getJSONParserAndElement()
|
||||
{
|
||||
Poco::JSON::Parser parser;
|
||||
DataTypes data_types;
|
||||
#if USE_SIMDJSON
|
||||
return std::pair<SimdJSONParser, SimdJSONParser::Element>();
|
||||
#elif USE_RAPIDJSON
|
||||
return std::pair<RapidJSONParser, RapidJSONParser::Element>();
|
||||
#else
|
||||
return std::pair<DummyJSONParser, DummyJSONParser::Element>();
|
||||
#endif
|
||||
}
|
||||
|
||||
DataTypePtr getDataTypeFromJSONField(const String & field)
|
||||
{
|
||||
auto [parser, element] = getJSONParserAndElement();
|
||||
bool parsed = parser.parse(field, element);
|
||||
if (!parsed)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object");
|
||||
|
||||
return getDataTypeFromJSONFieldImpl(element);
|
||||
}
|
||||
|
||||
template <class Extractor, const char opening_bracket, const char closing_bracket>
|
||||
static DataTypes determineColumnDataTypesFromJSONEachRowDataImpl(ReadBuffer & in, bool /*json_strings*/, Extractor & extractor)
|
||||
{
|
||||
String line = readJSONEachRowLineIntoStringImpl<opening_bracket, closing_bracket>(in);
|
||||
auto var = parser.parse(line);
|
||||
std::vector<Poco::Dynamic::Var> fields = extractor(var);
|
||||
auto [parser, element] = getJSONParserAndElement();
|
||||
bool parsed = parser.parse(line, element);
|
||||
if (!parsed)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse JSON object");
|
||||
|
||||
auto fields = extractor.extract(element);
|
||||
|
||||
DataTypes data_types;
|
||||
data_types.reserve(fields.size());
|
||||
for (const auto & field : fields)
|
||||
data_types.push_back(getDataTypeFromJSONField(field));
|
||||
data_types.push_back(getDataTypeFromJSONFieldImpl(field));
|
||||
|
||||
/// TODO: For JSONStringsEachRow/JSONCompactStringsEach all types will be strings.
|
||||
/// Should we try to parse data inside strings somehow in this case?
|
||||
@ -271,43 +229,57 @@ std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in
|
||||
return fileSegmentationEngineJSONEachRowImpl<'[', ']'>(in, memory, min_chunk_size, min_rows);
|
||||
}
|
||||
|
||||
struct JSONEachRowFieldsExtractor
|
||||
{
|
||||
template <class Element>
|
||||
std::vector<Element> extract(const Element & element)
|
||||
{
|
||||
/// {..., "<column_name>" : <value>, ...}
|
||||
auto object = element.getObject();
|
||||
std::vector<Element> fields;
|
||||
fields.reserve(object.size());
|
||||
column_names.reserve(object.size());
|
||||
for (const auto & key_value_pair : object)
|
||||
{
|
||||
column_names.emplace_back(key_value_pair.first);
|
||||
fields.push_back(key_value_pair.second);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
|
||||
std::vector<String> column_names;
|
||||
};
|
||||
|
||||
std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings)
|
||||
{
|
||||
std::vector<String> column_names;
|
||||
|
||||
/// {..., "<column_name>" : <value>, ...}
|
||||
auto extractor = [&](const Poco::Dynamic::Var & var)
|
||||
{
|
||||
Poco::JSON::Object::Ptr object = var.extract<Poco::JSON::Object::Ptr>();
|
||||
column_names = object->getNames();
|
||||
|
||||
std::vector<Poco::Dynamic::Var> fields;
|
||||
for (size_t i = 0; i != object->size(); ++i)
|
||||
fields.push_back(object->get(column_names[i]));
|
||||
return fields;
|
||||
};
|
||||
|
||||
auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl<'{', '}'>(in, json_strings, extractor);
|
||||
JSONEachRowFieldsExtractor extractor;
|
||||
auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, json_strings, extractor);
|
||||
std::unordered_map<String, DataTypePtr> result;
|
||||
for (size_t i = 0; i != column_names.size(); ++i)
|
||||
result[column_names[i]] = data_types[i];
|
||||
for (size_t i = 0; i != extractor.column_names.size(); ++i)
|
||||
result[extractor.column_names[i]] = data_types[i];
|
||||
return result;
|
||||
}
|
||||
|
||||
struct JSONCompactEachRowFieldsExtractor
|
||||
{
|
||||
template <class Element>
|
||||
std::vector<Element> extract(const Element & element)
|
||||
{
|
||||
/// [..., <value>, ...]
|
||||
auto array = element.getArray();
|
||||
std::vector<Element> fields;
|
||||
fields.reserve(array.size());
|
||||
for (size_t i = 0; i != array.size(); ++i)
|
||||
fields.push_back(array[i]);
|
||||
return fields;
|
||||
}
|
||||
};
|
||||
|
||||
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, bool json_strings)
|
||||
{
|
||||
/// [..., <value>, ...]
|
||||
auto extractor = [](const Poco::Dynamic::Var & var)
|
||||
{
|
||||
Poco::JSON::Array::Ptr array = var.extract<Poco::JSON::Array::Ptr>();
|
||||
std::vector<Poco::Dynamic::Var> fields;
|
||||
fields.reserve(array->size());
|
||||
for (size_t i = 0; i != array->size(); ++i)
|
||||
fields.push_back(array->get(i));
|
||||
return fields;
|
||||
};
|
||||
|
||||
return determineColumnDataTypesFromJSONEachRowDataImpl<'[', ']'>(in, json_strings, extractor);
|
||||
JSONCompactEachRowFieldsExtractor extractor;
|
||||
return determineColumnDataTypesFromJSONEachRowDataImpl<JSONCompactEachRowFieldsExtractor, '[', ']'>(in, json_strings, extractor);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include "config_formats.h"
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Poco/JSON/Object.h>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
@ -13,10 +14,10 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::M
|
||||
std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size, size_t min_rows);
|
||||
|
||||
|
||||
/// Convert JSON type to ClickHouse type. Make the result type always Nullable.
|
||||
/// Parse JSON from string and convert it's type to ClickHouse type. Make the result type always Nullable.
|
||||
/// JSON array with different nested types is treated as Tuple.
|
||||
/// If cannot convert (for example when field contains null), return nullptr.
|
||||
DataTypePtr getDataTypeFromJSONField(const Poco::Dynamic::Var & field);
|
||||
DataTypePtr getDataTypeFromJSONField(const String & field);
|
||||
|
||||
/// Read row in JSONEachRow format and try to determine type for each field.
|
||||
/// Return map {column_name : type}.
|
||||
|
@ -10,4 +10,5 @@
|
||||
#cmakedefine01 USE_ARROW
|
||||
#cmakedefine01 USE_PROTOBUF
|
||||
#cmakedefine01 USE_MSGPACK
|
||||
|
||||
#cmakedefine01 USE_SIMDJSON
|
||||
#cmakedefine01 USE_RAPIDJSON
|
||||
|
@ -55,12 +55,12 @@ NamesAndTypesList IRowSchemaReader::readSchema()
|
||||
if (data_types.empty())
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot read rows from the data");
|
||||
|
||||
/// If column names weren't set, use default names 'column_1', 'column_2', ...
|
||||
/// If column names weren't set, use default names 'c1', 'c2', ...
|
||||
if (column_names.empty())
|
||||
{
|
||||
column_names.reserve(data_types.size());
|
||||
for (size_t i = 0; i != data_types.size(); ++i)
|
||||
column_names.push_back("column_" + std::to_string(i + 1));
|
||||
column_names.push_back("c" + std::to_string(i + 1));
|
||||
}
|
||||
/// If column names were set, check that the number of names match the number of types.
|
||||
else if (column_names.size() != data_types.size())
|
||||
|
@ -88,9 +88,13 @@ static size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
||||
}
|
||||
|
||||
static void getFileReaderAndSchema(
|
||||
ReadBuffer & in, std::unique_ptr<arrow::adapters::orc::ORCFileReader> & file_reader, std::shared_ptr<arrow::Schema> & schema, const FormatSettings & format_settings)
|
||||
ReadBuffer & in,
|
||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> & file_reader,
|
||||
std::shared_ptr<arrow::Schema> & schema,
|
||||
const FormatSettings & format_settings,
|
||||
std::atomic<int> & is_stopped)
|
||||
{
|
||||
auto arrow_file = asArrowFile(*in, format_settings, is_stopped);
|
||||
auto arrow_file = asArrowFile(in, format_settings, is_stopped);
|
||||
if (is_stopped)
|
||||
return;
|
||||
|
||||
@ -108,7 +112,9 @@ static void getFileReaderAndSchema(
|
||||
void ORCBlockInputFormat::prepareReader()
|
||||
{
|
||||
std::shared_ptr<arrow::Schema> schema;
|
||||
getFileReaderAndSchema(*in, file_reader, schema, format_settings);
|
||||
getFileReaderAndSchema(*in, file_reader, schema, format_settings, is_stopped);
|
||||
if (is_stopped)
|
||||
return;
|
||||
|
||||
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "ORC", format_settings.orc.import_nested);
|
||||
|
||||
@ -143,7 +149,8 @@ NamesAndTypesList ORCSchemaReader::readSchema()
|
||||
{
|
||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
|
||||
std::shared_ptr<arrow::Schema> schema;
|
||||
getFileReaderAndSchema(in, file_reader, schema, format_settings);
|
||||
std::atomic<int> is_stopped = 0;
|
||||
getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped);
|
||||
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "ORC");
|
||||
return header.getNamesAndTypesList();
|
||||
}
|
||||
|
@ -117,9 +117,6 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
static Strings listFilesWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match);
|
||||
|
||||
|
||||
StorageHDFS::StorageHDFS(
|
||||
const String & uri_,
|
||||
const StorageID & table_id_,
|
||||
@ -187,14 +184,8 @@ class HDFSSource::DisclosedGlobIterator::Impl
|
||||
public:
|
||||
Impl(ContextPtr context_, const String & uri)
|
||||
{
|
||||
const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
|
||||
const String path_from_uri = uri.substr(begin_of_path);
|
||||
const String uri_without_path = uri.substr(0, begin_of_path); /// ends without '/'
|
||||
|
||||
HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef());
|
||||
HDFSFSPtr fs = createHDFSFS(builder.get());
|
||||
|
||||
uris = listFilesWithRegexpMatching("/", fs, path_from_uri);
|
||||
const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
|
||||
uris = getPathsList(path_from_uri, uri_without_path, context_);
|
||||
for (auto & elem : uris)
|
||||
elem = uri_without_path + elem;
|
||||
uris_iter = uris.begin();
|
||||
|
@ -1,8 +1,8 @@
|
||||
TSV
|
||||
column_1 Nullable(String)
|
||||
column_2 Nullable(String)
|
||||
column_3 Nullable(String)
|
||||
column_4 Nullable(String)
|
||||
c1 Nullable(String)
|
||||
c2 Nullable(String)
|
||||
c3 Nullable(String)
|
||||
c4 Nullable(String)
|
||||
42 Some string [1, 2, 3, 4] (1, 2, 3)
|
||||
42 abcd [] (4, 5, 6)
|
||||
TSVWithNames
|
||||
@ -13,18 +13,18 @@ tuple Nullable(String)
|
||||
42 Some string [1, 2, 3, 4] (1, 2, 3)
|
||||
42 abcd [] (4, 5, 6)
|
||||
CSV
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
column_4 Array(Nullable(Float64))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c4 Array(Nullable(Float64))
|
||||
\N Some string [([1,2.3],'String'),([],NULL)] [1,NULL,3]
|
||||
42 \N [([1,2.3],'String'),([3],'abcd')] [4,5,6]
|
||||
column_1 Nullable(String)
|
||||
column_2 Nullable(String)
|
||||
c1 Nullable(String)
|
||||
c2 Nullable(String)
|
||||
42 String
|
||||
String 42
|
||||
column_1 Nullable(String)
|
||||
column_2 Nullable(String)
|
||||
c1 Nullable(String)
|
||||
c2 Nullable(String)
|
||||
\N [NULL, NULL]
|
||||
\N []
|
||||
CSVWithNames
|
||||
@ -35,15 +35,15 @@ d Array(Nullable(Float64))
|
||||
\N Some string [([1,2.3],'String'),([],NULL)] [1,NULL,3]
|
||||
42 \N [([1,2.3],'String'),([3],'abcd')] [4,5,6]
|
||||
JSONCompactEachRow
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Array(Tuple(Nullable(Float64), Nullable(String)))
|
||||
column_3 Map(String, Nullable(Float64))
|
||||
column_4 Nullable(UInt8)
|
||||
c1 Nullable(Float64)
|
||||
c2 Array(Tuple(Nullable(Float64), Nullable(String)))
|
||||
c3 Map(String, Nullable(Float64))
|
||||
c4 Nullable(UInt8)
|
||||
42.42 [(1,'String'),(2,'abcd')] {'key':42,'key2':24} 1
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Array(Tuple(Nullable(Float64), Nullable(String)))
|
||||
column_3 Map(String, Nullable(Float64))
|
||||
column_4 Nullable(UInt8)
|
||||
c1 Nullable(Float64)
|
||||
c2 Array(Tuple(Nullable(Float64), Nullable(String)))
|
||||
c3 Map(String, Nullable(Float64))
|
||||
c4 Nullable(UInt8)
|
||||
\N [(1,'String'),(2,NULL)] {'key':NULL,'key2':24} \N
|
||||
32 [(2,'String 2'),(3,'hello')] {'key3':4242,'key4':2424} 1
|
||||
JSONCompactEachRowWithNames
|
||||
@ -82,60 +82,60 @@ s1 \N 1
|
||||
\N \N \N
|
||||
\N [3] \N
|
||||
Values
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Nullable(Float64))
|
||||
column_4 Tuple(Nullable(Float64), Nullable(String))
|
||||
column_5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Nullable(Float64))
|
||||
c4 Tuple(Nullable(Float64), Nullable(String))
|
||||
c5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
|
||||
42.42 Some string [1,2,3] (1,'2') ([1,2],[(3,'4'),(5,'6')])
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Nullable(Float64))
|
||||
column_4 Tuple(Nullable(Float64), Nullable(Float64))
|
||||
column_5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Nullable(Float64))
|
||||
c4 Tuple(Nullable(Float64), Nullable(Float64))
|
||||
c5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(String))))
|
||||
42.42 \N [1,NULL,3] (1,NULL) ([1,2],[(3,'4'),(5,'6')])
|
||||
\N Some string [10] (1,2) ([],[])
|
||||
Regexp
|
||||
column_1 Nullable(String)
|
||||
column_2 Nullable(String)
|
||||
column_3 Nullable(String)
|
||||
c1 Nullable(String)
|
||||
c2 Nullable(String)
|
||||
c3 Nullable(String)
|
||||
42 Some string 1 [([1, 2, 3], String 1), ([], String 1)]
|
||||
2 Some string 2 [([4, 5, 6], String 2), ([], String 2)]
|
||||
312 Some string 3 [([1, 2, 3], String 2), ([], String 2)]
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42 Some string 1 [([1,2,3],'String 1'),([],'String 1')]
|
||||
3 Some string 2 [([3,5,1],'String 2'),([],'String 2')]
|
||||
244 Some string 3 [([],'String 3'),([],'String 3')]
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42 Some string 1 [([1,2,3],'String 1'),([],'String 1')]
|
||||
2 Some string 2 [([],'String 2'),([],'String 2')]
|
||||
43 Some string 3 [([1,5,3],'String 3'),([],'String 3')]
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
|
||||
52 Some string 2 [([],'String 2'),([1],'String 2')]
|
||||
24 Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
|
||||
CustomSeparated
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
|
||||
42 Some string 2 [([],'String 2'),([],'String 2')]
|
||||
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
|
||||
42 Some string 2 [([],'String 2'),([],'String 2')]
|
||||
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
|
||||
column_1 Nullable(Float64)
|
||||
column_2 Nullable(String)
|
||||
column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(String)
|
||||
c3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42.42 Some string 1 [([1,2,3],'String 1'),([1],'String 1')]
|
||||
42 Some string 2 [([],'String 2'),([],'String 2')]
|
||||
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
|
||||
@ -159,12 +159,12 @@ column_3 Array(Tuple(Array(Nullable(Float64)), Nullable(String)))
|
||||
42 Some string 2 [([],'String 2'),([],'String 2')]
|
||||
\N Some string 3 [([1,2,3],'String 3'),([1],'String 3')]
|
||||
MsgPack
|
||||
column_1 Nullable(Int64)
|
||||
column_2 Nullable(Int64)
|
||||
column_3 Nullable(Float32)
|
||||
column_4 Nullable(String)
|
||||
column_5 Array(Array(Nullable(Int64)))
|
||||
column_6 Map(Int64, Array(Nullable(Int64)))
|
||||
c1 Nullable(Int64)
|
||||
c2 Nullable(Int64)
|
||||
c3 Nullable(Float32)
|
||||
c4 Nullable(String)
|
||||
c5 Array(Array(Nullable(Int64)))
|
||||
c6 Map(Int64, Array(Nullable(Int64)))
|
||||
\N 0 0 Str: 0 [[0,1],[0]] {0:[0,1]}
|
||||
1 \N 1 Str: 1 [[1,2],[1]] {1:[1,2]}
|
||||
\N 2 2 Str: 2 [[2,3],[2]] {2:[2,3]}
|
||||
|
Loading…
Reference in New Issue
Block a user