ClickHouse/src/Formats/JSONUtils.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

136 lines
4.8 KiB
C++
Raw Normal View History

2022-05-06 16:48:48 +00:00
#pragma once
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <Formats/FormatSettings.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/Progress.h>
#include <Core/NamesAndTypes.h>
#include <Common/Stopwatch.h>
#include <utility>
namespace DB
{
struct JSONInferenceInfo;
2022-05-18 09:25:26 +00:00
namespace JSONUtils
{
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows);
2022-05-18 09:25:26 +00:00
void skipRowForJSONEachRow(ReadBuffer & in);
void skipRowForJSONCompactEachRow(ReadBuffer & in);
2022-05-18 09:25:26 +00:00
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info);
2022-05-18 09:25:26 +00:00
/// Read row in JSONCompactEachRow format and try to determine type for each field.
/// If cannot determine the type of some field, return nullptr for it.
DataTypes readRowAndGetDataTypesForJSONCompactEachRow(ReadBuffer & in, const FormatSettings & settings, JSONInferenceInfo * inference_info);
2022-05-18 09:25:26 +00:00
bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf);
bool readField(
ReadBuffer & in,
IColumn & column,
const DataTypePtr & type,
const SerializationPtr & serialization,
const String & column_name,
const FormatSettings & format_settings,
bool yield_strings);
Strings makeNamesValidJSONStrings(const Strings & names, const FormatSettings & settings, bool validate_utf8);
2022-05-18 09:25:26 +00:00
/// Functions helpers for writing JSON data to WriteBuffer.
void writeFieldDelimiter(WriteBuffer & out, size_t new_lines = 1);
void writeFieldCompactDelimiter(WriteBuffer & out);
void writeObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
2022-09-08 16:07:20 +00:00
void writeCompactObjectStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
2022-05-18 09:25:26 +00:00
void writeObjectEnd(WriteBuffer & out, size_t indent = 0);
2022-09-08 16:07:20 +00:00
void writeCompactObjectEnd(WriteBuffer & out);
2022-05-18 09:25:26 +00:00
void writeArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeCompactArrayStart(WriteBuffer & out, size_t indent = 0, const char * title = nullptr);
void writeArrayEnd(WriteBuffer & out, size_t indent = 0);
void writeCompactArrayEnd(WriteBuffer & out);
void writeFieldFromColumn(
const IColumn & column,
const ISerialization & serialization,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
const std::optional<String> & name = std::nullopt,
2022-09-08 16:07:20 +00:00
size_t indent = 0,
const char * title_after_delimiter = " ",
bool pretty_json = false);
2022-05-18 09:25:26 +00:00
void writeColumns(
const Columns & columns,
const Names & names,
2022-05-18 09:25:26 +00:00
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out,
size_t indent = 0);
void writeCompactColumns(
const Columns & columns,
const Serializations & serializations,
size_t row_num,
bool yield_strings,
const FormatSettings & settings,
WriteBuffer & out);
void writeMetadata(const Names & names, const DataTypes & types, const FormatSettings & settings, WriteBuffer & out);
2022-05-18 09:25:26 +00:00
void writeAdditionalInfo(
size_t rows,
size_t rows_before_limit,
bool applied_limit,
const Stopwatch & watch,
const Progress & progress,
bool write_statistics,
WriteBuffer & out);
void writeException(const String & exception_message, WriteBuffer & out, const FormatSettings & settings, size_t indent = 0);
void skipColon(ReadBuffer & in);
void skipComma(ReadBuffer & in);
String readFieldName(ReadBuffer & in);
void skipArrayStart(ReadBuffer & in);
void skipArrayEnd(ReadBuffer & in);
bool checkAndSkipArrayStart(ReadBuffer & in);
bool checkAndSkipArrayEnd(ReadBuffer & in);
void skipObjectStart(ReadBuffer & in);
void skipObjectEnd(ReadBuffer & in);
bool checkAndSkipObjectEnd(ReadBuffer & in);
NamesAndTypesList readMetadata(ReadBuffer & in);
NamesAndTypesList readMetadataAndValidateHeader(ReadBuffer & in, const Block & header);
void validateMetadataByHeader(const NamesAndTypesList & names_and_types_from_metadata, const Block & header);
bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name);
void skipTheRestOfObject(ReadBuffer & in);
2022-05-18 09:25:26 +00:00
}
2022-05-06 16:48:48 +00:00
}