mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Fix special builds and tests
This commit is contained in:
parent
99026efcdc
commit
2626880f6e
@ -18,13 +18,13 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
static const FormatSettings & getFormatSettings()
|
||||
const FormatSettings & getFormatSettings()
|
||||
{
|
||||
static const FormatSettings settings;
|
||||
return settings;
|
||||
}
|
||||
|
||||
static const std::shared_ptr<SerializationDynamic> & getDynamicSerialization()
|
||||
const std::shared_ptr<SerializationDynamic> & getDynamicSerialization()
|
||||
{
|
||||
static const std::shared_ptr<SerializationDynamic> dynamic_serialization = std::make_shared<SerializationDynamic>();
|
||||
return dynamic_serialization;
|
||||
@ -394,7 +394,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
|
||||
const auto & src_object_column = assert_cast<const ColumnObject &>(src);
|
||||
|
||||
/// First, insert typed paths, they must be the same for both columns.
|
||||
for (auto & [path, column] : src_object_column.typed_paths)
|
||||
for (const auto & [path, column] : src_object_column.typed_paths)
|
||||
typed_paths[path]->insertFrom(*column, n);
|
||||
|
||||
/// Second, insert dynamic paths and extend them if needed.
|
||||
@ -428,7 +428,7 @@ void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t l
|
||||
const auto & src_object_column = assert_cast<const ColumnObject &>(src);
|
||||
|
||||
/// First, insert typed paths, they must be the same for both columns.
|
||||
for (auto & [path, column] : src_object_column.typed_paths)
|
||||
for (const auto & [path, column] : src_object_column.typed_paths)
|
||||
typed_paths[path]->insertRangeFrom(*column, start, length);
|
||||
|
||||
/// Second, insert dynamic paths and extend them if needed.
|
||||
@ -898,9 +898,9 @@ void ColumnObject::ensureOwnership()
|
||||
size_t ColumnObject::byteSize() const
|
||||
{
|
||||
size_t size = 0;
|
||||
for (auto & [_, column] : typed_paths)
|
||||
for (const auto & [_, column] : typed_paths)
|
||||
size += column->byteSize();
|
||||
for (auto & [_, column] : dynamic_paths)
|
||||
for (const auto & [_, column] : dynamic_paths)
|
||||
size += column->byteSize();
|
||||
size += shared_data->byteSize();
|
||||
return size;
|
||||
@ -909,9 +909,9 @@ size_t ColumnObject::byteSize() const
|
||||
size_t ColumnObject::byteSizeAt(size_t n) const
|
||||
{
|
||||
size_t size = 0;
|
||||
for (auto & [_, column] : typed_paths)
|
||||
for (const auto & [_, column] : typed_paths)
|
||||
size += column->byteSizeAt(n);
|
||||
for (auto & [_, column] : dynamic_paths)
|
||||
for (const auto & [_, column] : dynamic_paths)
|
||||
size += column->byteSizeAt(n);
|
||||
size += shared_data->byteSizeAt(n);
|
||||
return size;
|
||||
@ -920,9 +920,9 @@ size_t ColumnObject::byteSizeAt(size_t n) const
|
||||
size_t ColumnObject::allocatedBytes() const
|
||||
{
|
||||
size_t size = 0;
|
||||
for (auto & [_, column] : typed_paths)
|
||||
for (const auto & [_, column] : typed_paths)
|
||||
size += column->allocatedBytes();
|
||||
for (auto & [_, column] : dynamic_paths)
|
||||
for (const auto & [_, column] : dynamic_paths)
|
||||
size += column->allocatedBytes();
|
||||
size += shared_data->allocatedBytes();
|
||||
return size;
|
||||
@ -1040,9 +1040,9 @@ void ColumnObject::finalize()
|
||||
bool ColumnObject::isFinalized() const
|
||||
{
|
||||
bool finalized = true;
|
||||
for (auto & [_, column] : typed_paths)
|
||||
for (const auto & [_, column] : typed_paths)
|
||||
finalized &= column->isFinalized();
|
||||
for (auto & [_, column] : dynamic_paths)
|
||||
for (const auto & [_, column] : dynamic_paths)
|
||||
finalized &= column->isFinalized();
|
||||
finalized &= shared_data->isFinalized();
|
||||
return finalized;
|
||||
@ -1144,8 +1144,8 @@ size_t ColumnObject::findPathLowerBoundInSharedData(StringRef path, const Column
|
||||
|
||||
Iterator() = delete;
|
||||
Iterator(const ColumnString * data_, size_t index_) : data(data_), index(index_) {}
|
||||
Iterator(const Iterator & rhs) : data(rhs.data), index(rhs.index) {}
|
||||
Iterator & operator=(const Iterator & rhs) { data = rhs.data; index = rhs.index; return *this; }
|
||||
Iterator(const Iterator & rhs) = default;
|
||||
Iterator & operator=(const Iterator & rhs) = default;
|
||||
inline Iterator& operator+=(difference_type rhs) { index += rhs; return *this;}
|
||||
inline StringRef operator*() const {return data->getDataAt(index);}
|
||||
|
||||
|
@ -188,7 +188,7 @@ public:
|
||||
static void fillPathColumnFromSharedData(IColumn & path_column, StringRef path, const ColumnPtr & shared_data_column, size_t start, size_t end);
|
||||
|
||||
private:
|
||||
void insertFromSharedDataAndFillRemainingDynamicPaths(const ColumnObject & src_object_column, std::vector<String> & dynamic_paths_to_shared_data, size_t start, size_t length);
|
||||
void insertFromSharedDataAndFillRemainingDynamicPaths(const ColumnObject & src_object_column, std::vector<String> & src_dynamic_paths_for_shared_data, size_t start, size_t length);
|
||||
void serializePathAndValueIntoArena(Arena & arena, const char *& begin, StringRef path, StringRef value, StringRef & res) const;
|
||||
|
||||
/// Map path -> column for paths with explicitly specified types.
|
||||
|
@ -196,10 +196,29 @@ MutableColumnPtr DataTypeObject::createColumn() const
|
||||
namespace
|
||||
{
|
||||
|
||||
/// It is possible to have nested JSON object inside Dynamic. For example when we have an array of JSON objects.
|
||||
/// During type inference in parsing in case of creating nested JSON objects, we reduce max_dynamic_paths/max_dynamic_types by factors
|
||||
/// NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR/NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR.
|
||||
/// So the type name will actually be JSON(max_dynamic_paths=N, max_dynamic_types=M). But we want the user to be able to query it
|
||||
/// using json.array.:`Array(JSON)`.some.path without specifying max_dynamic_paths/max_dynamic_types.
|
||||
/// To support it, we do a trick - we replace JSON name in subcolumn to JSON(max_dynamic_paths=N, max_dynamic_types=M), because we know
|
||||
/// the exact values of max_dynamic_paths/max_dynamic_types for it.
|
||||
void replaceJSONTypeNameIfNeeded(String & type_name, size_t max_dynamic_paths, size_t max_dynamic_types)
|
||||
{
|
||||
auto pos = type_name.find("JSON");
|
||||
while (pos != String::npos)
|
||||
{
|
||||
/// Replace only if we don't already have parameters in JSON type declaration.
|
||||
if (pos + 4 == type_name.size() || type_name[pos + 4] != '(')
|
||||
type_name.replace(pos, 4, fmt::format("JSON(max_dynamic_paths={}, max_dynamic_types={})", max_dynamic_paths / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR, std::max(max_dynamic_types / DataTypeObject::NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR, 1lu)));
|
||||
pos = type_name.find("JSON", pos + 4);
|
||||
}
|
||||
}
|
||||
|
||||
/// JSON subcolumn name with Dynamic type subcolumn looks like this:
|
||||
/// "json.some.path.:`Type_name`.some.subcolumn".
|
||||
/// We back quoted type name during identifier parsing so we can distinguish type subcolumn and path element ":TypeName".
|
||||
std::pair<String, String> splitPathAndDynamicTypeSubcolumn(std::string_view subcolumn_name)
|
||||
std::pair<String, String> splitPathAndDynamicTypeSubcolumn(std::string_view subcolumn_name, size_t max_dynamic_paths, size_t max_dynamic_types)
|
||||
{
|
||||
/// Try to find dynamic type subcolumn in a form .:`Type`.
|
||||
auto pos = subcolumn_name.find(".:`");
|
||||
@ -212,6 +231,8 @@ std::pair<String, String> splitPathAndDynamicTypeSubcolumn(std::string_view subc
|
||||
if (!tryReadBackQuotedString(dynamic_subcolumn, buf))
|
||||
return {String(subcolumn_name), ""};
|
||||
|
||||
replaceJSONTypeNameIfNeeded(dynamic_subcolumn, max_dynamic_paths, max_dynamic_types);
|
||||
|
||||
/// If there is more data in the buffer - it's subcolumn of a type, append it to the type name.
|
||||
if (!buf.eof())
|
||||
dynamic_subcolumn += String(buf.position(), buf.available());
|
||||
@ -333,7 +354,7 @@ std::unique_ptr<ISerialization::SubstreamData> DataTypeObject::getDynamicSubcolu
|
||||
}
|
||||
|
||||
/// Split requested subcolumn to the JSON path and Dynamic type subcolumn.
|
||||
auto [path, path_subcolumn] = splitPathAndDynamicTypeSubcolumn(subcolumn_name);
|
||||
auto [path, path_subcolumn] = splitPathAndDynamicTypeSubcolumn(subcolumn_name, max_dynamic_paths, max_dynamic_types);
|
||||
std::unique_ptr<SubstreamData> res;
|
||||
if (auto it = typed_paths.find(path); it != typed_paths.end())
|
||||
{
|
||||
@ -373,18 +394,6 @@ std::unique_ptr<ISerialization::SubstreamData> DataTypeObject::getDynamicSubcolu
|
||||
/// Get subcolumn for Dynamic type if needed.
|
||||
if (!path_subcolumn.empty())
|
||||
{
|
||||
/// It is possible to have nested JSON object inside Dynamic. For example when we have an array of JSON objects.
|
||||
/// During parsing in case of creating nested JSON objects, we reduce max_dynamic_paths/max_dynamic_types by NESTED_OBJECT_REDUCE_FACTOR factor.
|
||||
/// So the type name will actually be JSON(max_dynamic_paths=N, max_dynamic_types=M). But we want the user to be able to query it
|
||||
/// using json.array.:`Array(JSON)`.some.path without specifying max_dynamic_paths/max_dynamic_types.
|
||||
/// To support it, we do a trick - we replace JSON name in subcolumn to JSON(max_dynamic_paths=N, max_dynamic_types=M), because we know
|
||||
/// the exact values of max_dynamic_paths/max_dynamic_types for it.
|
||||
auto pos = path_subcolumn.find("JSON");
|
||||
/// We want to replace JSON keyword only in the first subcolumn part before the first dot.
|
||||
auto first_dot_pos = path_subcolumn.find('.');
|
||||
if (pos != path_subcolumn.npos && (first_dot_pos == path_subcolumn.npos || pos < first_dot_pos))
|
||||
path_subcolumn.replace(pos, 4, fmt::format("JSON(max_dynamic_paths={}, max_dynamic_types={})", max_dynamic_paths / NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR, std::max(max_dynamic_types / NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR, 1lu)));
|
||||
|
||||
res = res->type->getSubcolumnData(path_subcolumn, *res, throw_if_null);
|
||||
if (!res)
|
||||
return nullptr;
|
||||
|
@ -23,7 +23,7 @@ public:
|
||||
static constexpr size_t NESTED_OBJECT_MAX_DYNAMIC_PATHS_REDUCE_FACTOR = 4;
|
||||
static constexpr size_t NESTED_OBJECT_MAX_DYNAMIC_TYPES_REDUCE_FACTOR = 2;
|
||||
|
||||
DataTypeObject(
|
||||
explicit DataTypeObject(
|
||||
const SchemaFormat & schema_format_,
|
||||
const std::unordered_map<String, DataTypePtr> & typed_paths_ = {},
|
||||
const std::unordered_set<String> & paths_to_skip_ = {},
|
||||
|
@ -31,7 +31,7 @@ public:
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t indent) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
@ -48,7 +48,7 @@ bool SerializationObject::shouldSkipPath(const String & path) const
|
||||
if (paths_to_skip.contains(path))
|
||||
return true;
|
||||
|
||||
auto it = std::lower_bound(sorted_typed_paths.begin(), sorted_typed_paths.end(), path);
|
||||
auto it = std::lower_bound(sorted_paths_to_skip.begin(), sorted_paths_to_skip.end(), path);
|
||||
if (it != sorted_paths_to_skip.end() && it != sorted_paths_to_skip.begin() && path.starts_with(*std::prev(it)))
|
||||
return true;
|
||||
|
||||
|
@ -101,7 +101,7 @@ private:
|
||||
{
|
||||
String path;
|
||||
|
||||
TypedPathSubcolumnCreator(const String & path_) : path(path_) {}
|
||||
explicit TypedPathSubcolumnCreator(const String & path_) : path(path_) {}
|
||||
|
||||
DataTypePtr create(const DataTypePtr & prev) const override { return prev; }
|
||||
ColumnPtr create(const ColumnPtr & prev) const override { return prev; }
|
||||
|
@ -67,7 +67,6 @@ private:
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Text/binary serialization is not implemented for object sub-object subcolumn");
|
||||
}
|
||||
|
||||
private:
|
||||
String path_prefix;
|
||||
std::unordered_map<String, SerializationPtr> typed_paths_serializations;
|
||||
SerializationPtr dynamic_serialization;
|
||||
|
@ -1389,7 +1389,7 @@ template <typename JSONParser>
|
||||
class DynamicNode : public JSONExtractTreeNode<JSONParser>
|
||||
{
|
||||
public:
|
||||
DynamicNode(
|
||||
explicit DynamicNode(
|
||||
size_t max_dynamic_paths_for_object_ = DataTypeObject::DEFAULT_MAX_SEPARATELY_STORED_PATHS,
|
||||
size_t max_dynamic_types_for_object_ = DataTypeDynamic::DEFAULT_MAX_DYNAMIC_TYPES)
|
||||
: max_dynamic_paths_for_object(max_dynamic_paths_for_object_), max_dynamic_types_for_object(max_dynamic_types_for_object_)
|
||||
@ -1412,7 +1412,7 @@ public:
|
||||
}
|
||||
|
||||
auto & variant_column = column_dynamic.getVariantColumn();
|
||||
auto & variant_info = column_dynamic.getVariantInfo();
|
||||
const auto & variant_info = column_dynamic.getVariantInfo();
|
||||
|
||||
/// First, try to insert element into current variants but with no types conversion.
|
||||
/// We want to avoid inferring the type on each row, so if we can insert this element into
|
||||
@ -1486,20 +1486,20 @@ private:
|
||||
switch (element.type())
|
||||
{
|
||||
case ElementType::NULL_VALUE:
|
||||
return getNullType();
|
||||
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
||||
case ElementType::BOOL:
|
||||
return getBoolType();
|
||||
return DataTypeFactory::instance().get("Bool");
|
||||
case ElementType::INT64:
|
||||
{
|
||||
auto type = getInt64Type();
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (element.getInt64() < 0)
|
||||
json_inference_info.negative_integers.insert(type.get());
|
||||
return type;
|
||||
}
|
||||
case ElementType::UINT64:
|
||||
return getUInt64Type();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
case ElementType::DOUBLE:
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
case ElementType::STRING:
|
||||
{
|
||||
auto data = element.getString();
|
||||
@ -1516,7 +1516,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
return getStringType();
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
case ElementType::ARRAY:
|
||||
{
|
||||
@ -1527,7 +1527,7 @@ private:
|
||||
types.push_back(elementToDataTypeImpl(value, format_settings, json_inference_info));
|
||||
|
||||
if (types.empty())
|
||||
return getEmptyArrayType();
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
|
||||
|
||||
if (checkIfTypesAreEqual(types))
|
||||
return std::make_shared<DataTypeArray>(types.back());
|
||||
@ -1561,51 +1561,6 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
/// During schema inference we create shared_ptr to the some data types quite a lot.
|
||||
/// Single creating of such shared_ptr is not expensive, but when it happens on each
|
||||
/// column on each row, it can be noticeable.
|
||||
const DataTypePtr & getBoolType() const
|
||||
{
|
||||
static const DataTypePtr bool_type = DataTypeFactory::instance().get("Bool");
|
||||
return bool_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getStringType() const
|
||||
{
|
||||
static const DataTypePtr string_type = std::make_shared<DataTypeString>();
|
||||
return string_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getInt64Type() const
|
||||
{
|
||||
static const DataTypePtr int64_type = std::make_shared<DataTypeInt64>();
|
||||
return int64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getUInt64Type() const
|
||||
{
|
||||
static const DataTypePtr uint64_type = std::make_shared<DataTypeUInt64>();
|
||||
return uint64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getFloat64Type() const
|
||||
{
|
||||
static const DataTypePtr float64_type = std::make_shared<DataTypeFloat64>();
|
||||
return float64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getNullType() const
|
||||
{
|
||||
static const DataTypePtr null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
||||
return null_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getEmptyArrayType() const
|
||||
{
|
||||
static const DataTypePtr empty_array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
|
||||
return empty_array_type;
|
||||
}
|
||||
|
||||
size_t max_dynamic_paths_for_object;
|
||||
size_t max_dynamic_types_for_object;
|
||||
|
||||
@ -1772,7 +1727,7 @@ private:
|
||||
}
|
||||
}
|
||||
/// Try to add a new dynamic path.
|
||||
else if (auto dynamic_column = column_object.tryToAddNewDynamicPath(current_path))
|
||||
else if (auto * dynamic_column = column_object.tryToAddNewDynamicPath(current_path))
|
||||
{
|
||||
if (!dynamic_node->insertResultToColumn(*dynamic_column, element, insert_settings, format_settings, error))
|
||||
{
|
||||
|
@ -36,63 +36,6 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
/// During schema inference we create shared_ptr to the some data types quite a lot.
|
||||
/// Single creating of such shared_ptr is not expensive, but when it happens on each
|
||||
/// column on each row, it can be noticeable.
|
||||
const DataTypePtr & getBoolType()
|
||||
{
|
||||
static const DataTypePtr bool_type = DataTypeFactory::instance().get("Bool");
|
||||
return bool_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getStringType()
|
||||
{
|
||||
static const DataTypePtr string_type = std::make_shared<DataTypeString>();
|
||||
return string_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getInt64Type()
|
||||
{
|
||||
static const DataTypePtr int64_type = std::make_shared<DataTypeInt64>();
|
||||
return int64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getUInt64Type()
|
||||
{
|
||||
static const DataTypePtr uint64_type = std::make_shared<DataTypeUInt64>();
|
||||
return uint64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getFloat64Type()
|
||||
{
|
||||
static const DataTypePtr float64_type = std::make_shared<DataTypeFloat64>();
|
||||
return float64_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getDateType()
|
||||
{
|
||||
static const DataTypePtr date_type = std::make_shared<DataTypeDate>();
|
||||
return date_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getDateTime64Type()
|
||||
{
|
||||
static const DataTypePtr date_type = std::make_shared<DataTypeDateTime64>(9);
|
||||
return date_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getNullType()
|
||||
{
|
||||
static const DataTypePtr null_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
||||
return null_type;
|
||||
}
|
||||
|
||||
const DataTypePtr & getEmptyArrayType()
|
||||
{
|
||||
static const DataTypePtr empty_array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
|
||||
return empty_array_type;
|
||||
}
|
||||
|
||||
/// Special data type that represents JSON object as a set of paths and their types.
|
||||
/// It supports merging two JSON objects and creating Named Tuple from itself.
|
||||
/// It's used only for schema inference of Named Tuples from JSON objects.
|
||||
@ -265,7 +208,7 @@ namespace
|
||||
if (leaf_type && !isNothing(removeNullable(leaf_type)) && !nodes.empty())
|
||||
{
|
||||
if (use_string_type_for_ambiguous_paths)
|
||||
return getStringType();
|
||||
return std::make_shared<DataTypeString>();
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
@ -331,7 +274,7 @@ namespace
|
||||
bool is_negative = json_info && json_info->negative_integers.contains(type.get());
|
||||
have_negative_integers |= is_negative;
|
||||
if (!is_negative)
|
||||
type = getUInt64Type();
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
}
|
||||
|
||||
@ -352,7 +295,7 @@ namespace
|
||||
WhichDataType which(type);
|
||||
if (which.isInt64() || which.isUInt64())
|
||||
{
|
||||
auto new_type = getFloat64Type();
|
||||
const auto & new_type = std::make_shared<DataTypeFloat64>();
|
||||
if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get()))
|
||||
json_info->numbers_parsed_from_json_strings.insert(new_type.get());
|
||||
type = new_type;
|
||||
@ -376,7 +319,7 @@ namespace
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isDate(type) || isDateTime64(type))
|
||||
type = getStringType();
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Date);
|
||||
@ -390,7 +333,7 @@ namespace
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isDate(type))
|
||||
type = getDateTime64Type();
|
||||
type = std::make_shared<DataTypeDateTime64>(9);
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Date);
|
||||
@ -412,7 +355,7 @@ namespace
|
||||
if (isNumber(type)
|
||||
&& (settings.json.read_numbers_as_strings || !json_info
|
||||
|| json_info->numbers_parsed_from_json_strings.contains(type.get())))
|
||||
type = getStringType();
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
updateTypeIndexes(data_types, type_indexes);
|
||||
@ -435,11 +378,11 @@ namespace
|
||||
if (isBool(type))
|
||||
{
|
||||
if (have_signed_integers)
|
||||
type = getInt64Type();
|
||||
type = std::make_shared<DataTypeInt64>();
|
||||
else if (have_unsigned_integers)
|
||||
type = getUInt64Type();
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
type = getFloat64Type();
|
||||
type = std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
}
|
||||
|
||||
@ -456,7 +399,7 @@ namespace
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isBool(type))
|
||||
type = getStringType();
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::UInt8);
|
||||
@ -606,7 +549,7 @@ namespace
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isMap(type))
|
||||
type = getStringType();
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Map);
|
||||
@ -856,7 +799,7 @@ namespace
|
||||
|
||||
/// Empty array has type Array(Nothing)
|
||||
if (nested_types.empty())
|
||||
return getEmptyArrayType();
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNothing>());
|
||||
|
||||
if (checkIfTypesAreEqual(nested_types))
|
||||
return std::make_shared<DataTypeArray>(std::move(nested_types.back()));
|
||||
@ -969,13 +912,13 @@ namespace
|
||||
/// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
|
||||
/// But, for now it is true
|
||||
if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
|
||||
Int64 tmp_int;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_int, buf))
|
||||
{
|
||||
auto type = getInt64Type();
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_info && tmp_int < 0)
|
||||
json_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
@ -985,7 +928,7 @@ namespace
|
||||
UInt64 tmp_uint;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_uint, buf))
|
||||
return getUInt64Type();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -997,13 +940,13 @@ namespace
|
||||
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
|
||||
|
||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ false);
|
||||
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, peekable_buf))
|
||||
{
|
||||
auto type = getInt64Type();
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_info && tmp_int < 0)
|
||||
json_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
@ -1013,11 +956,11 @@ namespace
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
if (tryReadIntText(tmp_uint, peekable_buf))
|
||||
return getUInt64Type();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
|
||||
{
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
/// This is not a number.
|
||||
@ -1034,7 +977,7 @@ namespace
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, buf) && buf.eof())
|
||||
{
|
||||
auto type = getInt64Type();
|
||||
auto type = std::make_shared<DataTypeInt64>();
|
||||
if (json_inference_info && tmp_int < 0)
|
||||
json_inference_info->negative_integers.insert(type.get());
|
||||
return type;
|
||||
@ -1046,7 +989,7 @@ namespace
|
||||
/// In case of Int64 overflow, try to infer UInt64
|
||||
UInt64 tmp_uint;
|
||||
if (tryReadIntText(tmp_uint, buf) && buf.eof())
|
||||
return getUInt64Type();
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
|
||||
@ -1055,7 +998,7 @@ namespace
|
||||
Float64 tmp;
|
||||
bool has_fractional;
|
||||
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -1079,7 +1022,7 @@ namespace
|
||||
if constexpr (is_json)
|
||||
{
|
||||
if (json_info->is_object_key)
|
||||
return getStringType();
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
if (auto type = tryInferDateOrDateTimeFromString(field, settings))
|
||||
@ -1097,7 +1040,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
return getStringType();
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool tryReadJSONObject(ReadBuffer & buf, const FormatSettings & settings, DataTypeJSONPaths::Paths & paths, const std::vector<String> & path, JSONInferenceInfo * json_info, size_t depth)
|
||||
@ -1254,7 +1197,7 @@ namespace
|
||||
return std::make_shared<DataTypeObjectDeprecated>("json", true);
|
||||
|
||||
if (settings.json.read_objects_as_strings)
|
||||
return getStringType();
|
||||
return std::make_shared<DataTypeString>();
|
||||
|
||||
transformInferredTypesIfNeededImpl<is_json>(value_types, settings, json_info);
|
||||
if (!checkIfTypesAreEqual(value_types))
|
||||
@ -1320,15 +1263,15 @@ namespace
|
||||
|
||||
/// Bool
|
||||
if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf))
|
||||
return getBoolType();
|
||||
return DataTypeFactory::instance().get("Bool");
|
||||
|
||||
/// Null or NaN
|
||||
if (checkCharCaseInsensitive('n', buf))
|
||||
{
|
||||
if (checkStringCaseInsensitive("ull", buf))
|
||||
return getNullType();
|
||||
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
|
||||
else if (checkStringCaseInsensitive("an", buf))
|
||||
return getFloat64Type();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
/// Number
|
||||
@ -1385,7 +1328,7 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
|
||||
|
||||
if (!remain_nothing_types && isNothing(data_type) && settings.json.infer_incomplete_types_as_strings)
|
||||
{
|
||||
data_type = getStringType();
|
||||
data_type = std::make_shared<DataTypeString>();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1402,7 +1345,7 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
|
||||
/// If all objects were empty, use type String, so these JSON objects will be read as Strings.
|
||||
if (json_paths->empty() && settings.json.infer_incomplete_types_as_strings)
|
||||
{
|
||||
data_type = getStringType();
|
||||
data_type = std::make_shared<DataTypeString>();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1424,7 +1367,7 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F
|
||||
auto key_type = map_type->getKeyType();
|
||||
/// If all inferred Maps are empty, use type String, so these JSON objects will be read as Strings.
|
||||
if (isNothing(key_type) && settings.json.infer_incomplete_types_as_strings)
|
||||
key_type = getStringType();
|
||||
key_type = std::make_shared<DataTypeString>();
|
||||
|
||||
auto value_type = map_type->getValueType();
|
||||
|
||||
@ -1501,10 +1444,10 @@ DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSet
|
||||
DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings)
|
||||
{
|
||||
if (settings.try_infer_dates && tryInferDate(field))
|
||||
return getDateType();
|
||||
return std::make_shared<DataTypeDate>();
|
||||
|
||||
if (settings.try_infer_datetimes && tryInferDateTime(field, settings))
|
||||
return getDateTime64Type();
|
||||
return std::make_shared<DataTypeDateTime64>(9);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -165,6 +165,7 @@ private:
|
||||
std::vector<String> sorted_dynamic_and_typed_paths;
|
||||
const auto & typed_path_columns = column_object.getTypedPaths();
|
||||
const auto & dynamic_path_columns = column_object.getDynamicPaths();
|
||||
sorted_dynamic_and_typed_paths.reserve(typed_path_columns.size() + dynamic_path_columns.size());
|
||||
for (const auto & [path, _] : typed_path_columns)
|
||||
sorted_dynamic_and_typed_paths.push_back(path);
|
||||
for (const auto & [path, _] : dynamic_path_columns)
|
||||
|
@ -68,6 +68,7 @@ WITH map(
|
||||
'Map', 'JSON',
|
||||
'Tuple', 'JSON',
|
||||
'Object', 'JSON',
|
||||
'JSON', 'JSON',
|
||||
'String', '{}',
|
||||
'FixedString', '{}') AS native_to_mysql_mapping,
|
||||
)",
|
||||
|
@ -35,27 +35,27 @@ ASTPtr ASTObjectTypeArgument::clone() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTObjectTypeArgument::formatImpl(const FormatSettings & parameters, FormatState & state, FormatStateStacked frame) const
|
||||
void ASTObjectTypeArgument::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
if (path_with_type)
|
||||
{
|
||||
path_with_type->formatImpl(parameters, state, frame);
|
||||
path_with_type->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (parameter)
|
||||
{
|
||||
parameter->formatImpl(parameters, state, frame);
|
||||
parameter->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (skip_path)
|
||||
{
|
||||
std::string indent_str = parameters.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||
parameters.ostr << indent_str << "SKIP" << ' ';
|
||||
skip_path->formatImpl(parameters, state, frame);
|
||||
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||
settings.ostr << indent_str << "SKIP" << ' ';
|
||||
skip_path->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (skip_path_regexp)
|
||||
{
|
||||
std::string indent_str = parameters.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||
parameters.ostr << indent_str << "SKIP REGEXP" << ' ';
|
||||
skip_path_regexp->formatImpl(parameters, state, frame);
|
||||
std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' ');
|
||||
settings.ostr << indent_str << "SKIP REGEXP" << ' ';
|
||||
skip_path_regexp->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -213,6 +213,9 @@ void MergeTreeReaderWide::addStreams(
|
||||
|
||||
ISerialization::StreamCallback callback = [&] (const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
if (ISerialization::isFictitiousSubcolumn(substream_path, substream_path.size()))
|
||||
return;
|
||||
|
||||
auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums());
|
||||
|
||||
/** If data file is missing then we will not try to open it.
|
||||
@ -348,6 +351,9 @@ void MergeTreeReaderWide::prefetchForColumn(
|
||||
deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache);
|
||||
auto callback = [&](const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
if (ISerialization::isFictitiousSubcolumn(substream_path, substream_path.size()))
|
||||
return;
|
||||
|
||||
auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums());
|
||||
|
||||
if (stream_name && !prefetched_streams.contains(*stream_name))
|
||||
|
@ -211,6 +211,9 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
{
|
||||
get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path)
|
||||
{
|
||||
if (ISerialization::isFictitiousSubcolumn(substream_path, substream_path.size()))
|
||||
return;
|
||||
|
||||
auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(column, substream_path, ".bin", data_part_storage);
|
||||
|
||||
if (!stream_name)
|
||||
|
@ -1,9 +1,8 @@
|
||||
('a.b','Int64')
|
||||
('a.c','Array(JSON(max_dynamic_types=16, max_dynamic_paths=256))')
|
||||
('a.c','Array(Nullable(String))')
|
||||
('d','Int64')
|
||||
('e','Array(Nullable(Int64))')
|
||||
('f','Int64')
|
||||
('d','Int64')
|
||||
{"o":{"a":{"b":"1","c":[{"d":"10","e":["31"]},{"d":"20","e":["63","127"]}]}}}
|
||||
{"o":{"a":{"b":"2","c":[]}}}
|
||||
{"o":{"a":{"b":"3","c":[{"e":["32"],"f":"20"},{"e":["64","128"],"f":"30"}]}}}
|
||||
|
@ -8,8 +8,8 @@ CREATE TABLE t_json_10 (o JSON) ENGINE = Memory;
|
||||
INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 1, "c": [{"d": 10, "e": [31]}, {"d": 20, "e": [63, 127]}]}} {"a": {"b": 2, "c": []}}
|
||||
INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 3, "c": [{"f": 20, "e": [32]}, {"f": 30, "e": [64, 128]}]}} {"a": {"b": 4, "c": []}}
|
||||
|
||||
SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(o)) FROM t_json_10;
|
||||
SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(o.a.c.:`Array(JSON)`))) FROM t_json_10;
|
||||
SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(o)) as path FROM t_json_10 order by path;
|
||||
SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(o.a.c.:`Array(JSON)`))) as path FROM t_json_10 order by path;
|
||||
SELECT o FROM t_json_10 ORDER BY o.a.b FORMAT JSONEachRow;
|
||||
SELECT o.a.b, o.a.c.:`Array(JSON)`.d, o.a.c.:`Array(JSON)`.e, o.a.c.:`Array(JSON)`.f FROM t_json_10 ORDER BY o.a.b;
|
||||
|
||||
|
@ -53,10 +53,10 @@ cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_11 FORMAT JSONAsObject"
|
||||
}
|
||||
EOF
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) FROM t_json_11;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_1[]))) FROM t_json_11;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_1[].key_3[])))) FROM t_json_11;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_1[].key_3[].key_4[]))))) FROM t_json_11;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) as path FROM t_json_11 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_1[]))) as path FROM t_json_11 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_1[].key_3[])))) as path FROM t_json_11 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_1[].key_3[].key_4[]))))) as path FROM t_json_11 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3 FROM t_json_11 ORDER BY obj.id FORMAT JSONEachRow"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj.key_1[].key_3[].key_4[].key_5, obj.key_1[].key_3[].key_7 FROM t_json_11 ORDER BY obj.id"
|
||||
|
@ -43,10 +43,10 @@ cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_12 FORMAT JSONAsObject"
|
||||
}
|
||||
EOF
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) FROM t_json_12;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_0[]))) FROM t_json_12;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_0[].key_1[])))) FROM t_json_12;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_0[].key_1[].key_3[]))))) FROM t_json_12;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) as path FROM t_json_12 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key_0[]))) as path FROM t_json_12 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(obj.key_0[].key_1[])))) as path FROM t_json_12 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(arrayJoin(obj.key_0[].key_1[].key_3[]))))) as path FROM t_json_12 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_12 ORDER BY obj.id FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj.key_0[].key_1[].key_3[].key_4, obj.key_0[].key_1[].key_3[].key_5, \
|
||||
obj.key_0[].key_1[].key_3[].key_6, obj.key_0[].key_1[].key_3[].key_7 FROM t_json_12 ORDER BY obj.id"
|
||||
|
@ -36,8 +36,8 @@ cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_13 FORMAT JSONAsObject"
|
||||
}
|
||||
EOF
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) FROM t_json_13;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key1[]))) FROM t_json_13;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(obj)) as path FROM t_json_13 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(obj.key1[]))) as path FROM t_json_13 order by path;"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT obj FROM t_json_13 ORDER BY obj.id FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1
|
||||
$CLICKHOUSE_CLIENT -q "SELECT \
|
||||
|
@ -51,9 +51,9 @@ cat <<EOF | $CLICKHOUSE_CLIENT -q "INSERT INTO t_json_6 FORMAT JSONAsObject"
|
||||
}
|
||||
EOF
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) FROM t_json_6;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.out[]))) FROM t_json_6;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.out[].outputs[])))) FROM t_json_6;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) as path FROM t_json_6 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.out[]))) as path FROM t_json_6 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.out[].outputs[])))) as path FROM t_json_6 order by path;"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT data.key, data.out[].type, data.out[].value, data.out[].outputs[].index, data.out[].outputs[].n FROM t_json_6 ORDER BY data.key"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE t_json_6;"
|
||||
|
@ -16,9 +16,9 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE btc (data JSON) ENGINE = MergeTree ORDER B
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO btc SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/btc_transactions.json', 'JSONAsObject')"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count() FROM btc WHERE NOT ignore(*)"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(data)) from btc"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.inputs.:\`Array(JSON)\`))) from btc"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.inputs.:\`Array(JSON)\`.prev_out.spending_outpoints.:\`Array(JSON)\`)))) from btc"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(data)) as path from btc order by path"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.inputs.:\`Array(JSON)\`))) as path from btc order by path"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT distinct arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.inputs.:\`Array(JSON)\`.prev_out.spending_outpoints.:\`Array(JSON)\`)))) as path from btc order by path"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT avg(data.fee.:Int64), median(data.fee.:Int64) FROM btc"
|
||||
|
||||
|
@ -0,0 +1,12 @@
|
||||
5000
|
||||
leonardomso/33-js-concepts 3
|
||||
ytdl-org/youtube-dl 3
|
||||
Bogdanp/neko 2
|
||||
bminossi/AllVideoPocsFromHackerOne 2
|
||||
disclose/diodata 2
|
||||
Commit 182
|
||||
chipeo345 119
|
||||
phanwi346 114
|
||||
Nicholas Piggin 95
|
||||
direwolf-github 49
|
||||
2
|
26
tests/queries/0_stateless/01825_new_type_json_ghdata.sh
Executable file
26
tests/queries/0_stateless/01825_new_type_json_ghdata.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1
|
||||
|
||||
cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q \
|
||||
"SELECT data.repo.name, count() AS stars FROM ghdata \
|
||||
WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q \
|
||||
"SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \
|
||||
ARRAY JOIN data.payload.commits[].author.name \
|
||||
GROUP BY name ORDER BY c DESC, name LIMIT 5"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT max(data.payload.pull_request.assignees[].size0) FROM ghdata"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
|
@ -19,8 +19,8 @@ cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT \
|
||||
(SELECT groupUniqArrayMap(JSONAllPathsWithTypes(data)), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \
|
||||
(SELECT groupUniqArrayMap(JSONAllPathsWithTypes(data)), sum(cityHash64(toString(data))) FROM ghdata_2)"
|
||||
(SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \
|
||||
(SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2)"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2"
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string"
|
||||
|
@ -12,12 +12,12 @@
|
||||
('results.drb','Int64')
|
||||
('results.fg','Int64')
|
||||
('results.fg3','Int64')
|
||||
('results.fg3_pct','DateTime64(9)')
|
||||
('results.fg3_pct','String')
|
||||
('results.fg3a','Int64')
|
||||
('results.fg_pct','DateTime64(9)')
|
||||
('results.fg_pct','String')
|
||||
('results.fga','Int64')
|
||||
('results.ft','Int64')
|
||||
('results.ft_pct','DateTime64(9)')
|
||||
('results.ft_pct','String')
|
||||
('results.fta','Int64')
|
||||
('results.mp','Int64')
|
||||
('results.orb','Int64')
|
||||
@ -28,7 +28,6 @@
|
||||
('results.trb','Int64')
|
||||
('score','Int64')
|
||||
('won','Int64')
|
||||
('results.fg3_pct','String')
|
||||
Boston Celtics 70
|
||||
Los Angeles Lakers 64
|
||||
Milwaukee Bucks 61
|
||||
@ -41,10 +40,10 @@ Atlanta Hawks 55
|
||||
('fg3','Int64')
|
||||
('fg3_pct','String')
|
||||
('fg3a','Int64')
|
||||
('fg_pct','DateTime64(9)')
|
||||
('fg_pct','String')
|
||||
('fga','Int64')
|
||||
('ft','Int64')
|
||||
('ft_pct','DateTime64(9)')
|
||||
('ft_pct','String')
|
||||
('fta','Int64')
|
||||
('mp','String')
|
||||
('orb','Int64')
|
||||
@ -54,9 +53,6 @@ Atlanta Hawks 55
|
||||
('stl','Int64')
|
||||
('tov','Int64')
|
||||
('trb','Int64')
|
||||
('fg3_pct','DateTime64(9)')
|
||||
('fg_pct','String')
|
||||
('ft_pct','String')
|
||||
Larry Bird 10
|
||||
Clyde Drexler 4
|
||||
Alvin Robertson 3
|
||||
|
@ -14,15 +14,15 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames (data JSON) ENGINE = MergeTree OR
|
||||
cat $CUR_DIR/data_json/nbagames_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames FORMAT JSONAsObject"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT count() FROM nbagames WHERE NOT ignore(*)"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) from nbagames"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.teams[]))) from nbagames"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(data)) as path from nbagames order by path"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(data.teams[]))) as path from nbagames order by path"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q \
|
||||
"SELECT teams.name.:String AS name, sum(teams.won.:Int64) AS wins FROM nbagames \
|
||||
ARRAY JOIN data.teams[] AS teams GROUP BY name \
|
||||
ORDER BY wins DESC LIMIT 5;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.teams[].players[])))) from nbagames"
|
||||
${CLICKHOUSE_CLIENT} -q "SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(arrayJoin(data.teams[].players[])))) as path from nbagames order by path"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q \
|
||||
"SELECT player, sum(triple_double) AS triple_doubles FROM \
|
||||
|
@ -1,9 +0,0 @@
|
||||
Tuple(\n a Tuple(\n b Int8,\n c Nested(d Int8, e Array(Int16), f Int8)))
|
||||
{"o":{"a":{"b":1,"c":[{"d":10,"e":[31],"f":0},{"d":20,"e":[63,127],"f":0}]}}}
|
||||
{"o":{"a":{"b":2,"c":[]}}}
|
||||
{"o":{"a":{"b":3,"c":[{"d":0,"e":[32],"f":20},{"d":0,"e":[64,128],"f":30}]}}}
|
||||
{"o":{"a":{"b":4,"c":[]}}}
|
||||
1 [10,20] [[31],[63,127]] [0,0]
|
||||
2 [] [] []
|
||||
3 [0,0] [[32],[64,128]] [20,30]
|
||||
4 [] [] []
|
@ -4,7 +4,7 @@ SET allow_experimental_object_type = 1;
|
||||
SET output_format_json_named_tuples_as_objects = 1;
|
||||
|
||||
DROP TABLE IF EXISTS t_json_10;
|
||||
CREATE TABLE t_json_10 (o JSON) ENGINE = Memory;
|
||||
CREATE TABLE t_json_10 (o Object('json')) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 1, "c": [{"d": 10, "e": [31]}, {"d": 20, "e": [63, 127]}]}} {"a": {"b": 2, "c": []}}
|
||||
INSERT INTO t_json_10 FORMAT JSONAsObject {"a": {"b": 3, "c": [{"f": 20, "e": [32]}, {"f": 30, "e": [64, 128]}]}} {"a": {"b": 4, "c": []}}
|
||||
|
@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data Object('json')) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_object_type 1
|
||||
|
||||
cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject"
|
||||
|
||||
|
@ -39,7 +39,7 @@ ${CLICKHOUSE_CLIENT} -q \
|
||||
GROUP BY player ORDER BY triple_doubles DESC, player LIMIT 5"
|
||||
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_string (data String) ENGINE = MergeTree ORDER BY tuple()"
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||
${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_from_string (data Object('json')) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1
|
||||
|
||||
cat $CUR_DIR/data_json/nbagames_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_string FORMAT JSONAsString"
|
||||
${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_from_string SELECT data FROM nbagames_string"
|
||||
|
@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_json_async_insert"
|
||||
$CLICKHOUSE_CLIENT --allow_experimental_object_type=1 -q "CREATE TABLE t_json_async_insert (data Object(''json'')) ENGINE = MergeTree ORDER BY tuple()"
|
||||
$CLICKHOUSE_CLIENT --allow_experimental_object_type=1 -q "CREATE TABLE t_json_async_insert (data Object('json')) ENGINE = MergeTree ORDER BY tuple()"
|
||||
|
||||
$CLICKHOUSE_CLIENT --async_insert=1 --wait_for_async_insert=1 -q 'INSERT INTO t_json_async_insert FORMAT JSONAsObject {"aaa"}' 2>&1 | grep -o -m1 "Cannot parse object"
|
||||
$CLICKHOUSE_CLIENT -q "SELECT count() FROM t_json_async_insert"
|
||||
|
@ -21,7 +21,7 @@ echo '
|
||||
}
|
||||
}' > 02482_object_data.jsonl
|
||||
|
||||
$CLICKHOUSE_LOCAL --allow_experimental_object_type=1 -q "select * from file(02482_object_data.jsonl, auto, 'obj Object('json')')"
|
||||
$CLICKHOUSE_LOCAL --allow_experimental_object_type=1 -q "select * from file(02482_object_data.jsonl, auto, 'obj Object(''json'')')"
|
||||
|
||||
rm 02482_object_data.jsonl
|
||||
|
||||
|
@ -44,7 +44,7 @@ nested.col1 Array(String) NO \N
|
||||
nested.col2 Array(UInt32) NO \N
|
||||
nfs Nullable(FixedString(3)) YES \N
|
||||
ns Nullable(String) YES \N
|
||||
o Object(\'json\') NO \N
|
||||
o JSON NO \N
|
||||
p Point NO \N
|
||||
pg Polygon NO \N
|
||||
r Ring NO \N
|
||||
|
Loading…
Reference in New Issue
Block a user