Merge branch 'ClickHouse:master' into fix_datetime64_in_datetime64

This commit is contained in:
Yarik Briukhovetskyi 2024-12-02 12:19:09 +01:00 committed by GitHub
commit fd3723a727
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 334 additions and 99 deletions

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit c2b0811f164a7948208489562dab4f186eb305ce
Subproject commit ce6de271811899d587fc28b500041ebcf720014f

View File

@ -192,7 +192,7 @@ struct AggregateFunctionGroupArrayIntersectGenericData
* For such columns GroupArrayIntersect() can be implemented more efficiently (especially for small numeric arrays).
*/
template <bool is_plain_column = false>
class AggregateFunctionGroupArrayIntersectGeneric
class AggregateFunctionGroupArrayIntersectGeneric final
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData,
AggregateFunctionGroupArrayIntersectGeneric<is_plain_column>>
{
@ -353,7 +353,7 @@ namespace
{
/// Substitute return type for Date and DateTime
class AggregateFunctionGroupArrayIntersectDate : public AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>
class AggregateFunctionGroupArrayIntersectDate final : public AggregateFunctionGroupArrayIntersect<DataTypeDate::FieldType>
{
public:
explicit AggregateFunctionGroupArrayIntersectDate(const DataTypePtr & argument_type, const Array & parameters_)
@ -361,7 +361,7 @@ public:
static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
};
class AggregateFunctionGroupArrayIntersectDateTime : public AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>
class AggregateFunctionGroupArrayIntersectDateTime final : public AggregateFunctionGroupArrayIntersect<DataTypeDateTime::FieldType>
{
public:
explicit AggregateFunctionGroupArrayIntersectDateTime(const DataTypePtr & argument_type, const Array & parameters_)
@ -369,7 +369,7 @@ public:
static DataTypePtr createResultType() { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
};
class AggregateFunctionGroupArrayIntersectDate32 : public AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>
class AggregateFunctionGroupArrayIntersectDate32 final : public AggregateFunctionGroupArrayIntersect<DataTypeDate32::FieldType>
{
public:
explicit AggregateFunctionGroupArrayIntersectDate32(const DataTypePtr & argument_type, const Array & parameters_)

View File

@ -153,7 +153,7 @@ static void deserializeAndInsertImpl(StringRef str, IColumn & data_to);
* For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
*/
template <bool is_plain_column = false, typename LimitNumElems = std::false_type>
class AggregateFunctionGroupUniqArrayGeneric
class AggregateFunctionGroupUniqArrayGeneric final
: public IAggregateFunctionDataHelper<AggregateFunctionGroupUniqArrayGenericData,
AggregateFunctionGroupUniqArrayGeneric<is_plain_column, LimitNumElems>>
{
@ -245,7 +245,7 @@ public:
/// Substitute return type for Date and DateTime
template <typename HasLimit>
class AggregateFunctionGroupUniqArrayDate : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
class AggregateFunctionGroupUniqArrayDate final : public AggregateFunctionGroupUniqArray<DataTypeDate::FieldType, HasLimit>
{
public:
explicit AggregateFunctionGroupUniqArrayDate(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
@ -254,7 +254,7 @@ public:
};
template <typename HasLimit>
class AggregateFunctionGroupUniqArrayDateTime : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, HasLimit>
class AggregateFunctionGroupUniqArrayDateTime final : public AggregateFunctionGroupUniqArray<DataTypeDateTime::FieldType, HasLimit>
{
public:
explicit AggregateFunctionGroupUniqArrayDateTime(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())
@ -263,7 +263,7 @@ public:
};
template <typename HasLimit>
class AggregateFunctionGroupUniqArrayIPv4 : public AggregateFunctionGroupUniqArray<DataTypeIPv4::FieldType, HasLimit>
class AggregateFunctionGroupUniqArrayIPv4 final : public AggregateFunctionGroupUniqArray<DataTypeIPv4::FieldType, HasLimit>
{
public:
explicit AggregateFunctionGroupUniqArrayIPv4(const DataTypePtr & argument_type, const Array & parameters_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max())

View File

@ -28,7 +28,7 @@ namespace
/// Returns tuple of (z-statistic, p-value, confidence-interval-low, confidence-interval-high)
template <typename Data>
class AggregateFunctionMeanZTest :
class AggregateFunctionMeanZTest final:
public IAggregateFunctionDataHelper<Data, AggregateFunctionMeanZTest<Data>>
{
private:

View File

@ -47,7 +47,7 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
}
};
class AggregateFunctionRankCorrelation :
class AggregateFunctionRankCorrelation final :
public IAggregateFunctionDataHelper<RankCorrelationData, AggregateFunctionRankCorrelation>
{
public:

View File

@ -38,7 +38,7 @@ namespace ErrorCodes
/// Returns tuple of (t-statistic, p-value)
/// https://cpb-us-w2.wpmucdn.com/voices.uchicago.edu/dist/9/1193/files/2016/01/05b-TandP.pdf
template <typename Data>
class AggregateFunctionTTest :
class AggregateFunctionTTest final:
public IAggregateFunctionDataHelper<Data, AggregateFunctionTTest<Data>>
{
private:

View File

@ -203,7 +203,7 @@ struct AggregateFunctionTopKGenericData
* For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
*/
template <bool is_plain_column, bool is_weighted>
class AggregateFunctionTopKGeneric
class AggregateFunctionTopKGeneric final
: public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>
{
private:
@ -367,7 +367,7 @@ public:
/// Substitute return type for Date and DateTime
template <bool is_weighted>
class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
class AggregateFunctionTopKDate final : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
{
public:
using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
@ -384,7 +384,7 @@ public:
};
template <bool is_weighted>
class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
class AggregateFunctionTopKDateTime final : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
{
public:
using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
@ -401,7 +401,7 @@ public:
};
template <bool is_weighted>
class AggregateFunctionTopKIPv4 : public AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>
class AggregateFunctionTopKIPv4 final : public AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>
{
public:
using AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>::AggregateFunctionTopK;

View File

@ -150,7 +150,7 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
* Adding -Distinct suffix to aggregate function
**/
template <typename Data>
class AggregateFunctionDistinct : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>>
class AggregateFunctionDistinct final : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>>
{
private:
AggregateFunctionPtr nested_func;

View File

@ -114,7 +114,7 @@ struct CrossTabData
template <typename Data>
class AggregateFunctionCrossTab : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
class AggregateFunctionCrossTab final : public IAggregateFunctionDataHelper<Data, AggregateFunctionCrossTab<Data>>
{
public:
explicit AggregateFunctionCrossTab(const DataTypes & arguments)

View File

@ -176,6 +176,8 @@ public:
/// Serializes state (to transmit it over the network, for example).
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
virtual void serializeBatch(const PaddedPODArray<AggregateDataPtr> & data, size_t start, size_t size, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
/// Deserializes state. This function is called only for empty (just created) states.
virtual void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> version = std::nullopt, Arena * arena = nullptr) const = 0; /// NOLINT
@ -471,6 +473,12 @@ public:
}
}
void serializeBatch(const PaddedPODArray<AggregateDataPtr> & data, size_t start, size_t size, WriteBuffer & buf, std::optional<size_t> version) const override // NOLINT
{
for (size_t i = start; i < size; ++i)
static_cast<const Derived *>(this)->serialize(data[i], buf, version);
}
void addBatchSparse(
size_t row_begin,
size_t row_end,

View File

@ -418,9 +418,10 @@ WeakHash32 ColumnAggregateFunction::getWeakHash32() const
void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
{
/// Fallback to per-element hashing, as there is no faster way
for (size_t i = 0; i < size(); ++i)
updateHashWithValue(i, hash);
WriteBufferFromOwnString wbuf;
const ColumnAggregateFunction::Container & vec = getData();
func->serializeBatch(vec, 0, size(), wbuf);
hash.update(wbuf.str().c_str(), wbuf.str().size());
}
/// The returned size is less than real size. The reason is that some parts of

View File

@ -266,7 +266,7 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
}
const size_t voting_members = get_num_voting_members();
const auto not_responding_peers = get_not_responding_peers();
const auto not_responding_peers = get_not_responding_peers_count();
const auto quorum_size = voting_members / 2 + 1;
const auto max_not_responding_peers = voting_members - quorum_size;
@ -303,6 +303,11 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
return std::unique_lock(lock_);
}
std::unique_lock<std::mutex> lockCommit()
{
return std::unique_lock(commit_lock_);
}
bool isCommitInProgress() const
{
return sm_commit_exec_in_progress_;
@ -1228,6 +1233,7 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const
uint64_t KeeperServer::createSnapshot()
{
auto commit_lock = raft_instance->lockCommit();
uint64_t log_idx = raft_instance->create_snapshot();
if (log_idx != 0)
LOG_INFO(log, "Snapshot creation scheduled with last committed log index {}.", log_idx);

View File

@ -142,13 +142,14 @@ void KeeperStateMachine<Storage>::init()
}
catch (...)
{
tryLogCurrentException(
LOG_FATAL(
log,
fmt::format(
"Aborting because of failure to load from latest snapshot with index {}. Problematic snapshot can be removed but it will "
"lead to data loss",
latest_log_index));
std::abort();
"Failure to load from latest snapshot with index {}: {}",
latest_log_index,
getCurrentExceptionMessage(true, true, false));
LOG_FATAL(
log, "Manual intervention is necessary for recovery. Problematic snapshot can be removed but it will lead to data loss");
abort();
}
}
@ -427,8 +428,13 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to preprocess stored log at index {}, aborting to avoid inconsistent state", request_for_session.log_idx));
std::abort();
LOG_FATAL(
log,
"Failed to preprocess stored log at index {}: {}",
request_for_session.log_idx,
getCurrentExceptionMessage(true, true, false));
LOG_FATAL(log, "Aborting to avoid inconsistent state");
abort();
}
if (keeper_context->digestEnabled() && request_for_session.digest)

View File

@ -8,6 +8,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
template<typename V>
struct ListNode
{
@ -292,7 +297,8 @@ public:
{
size_t hash_value = map.hash(key);
auto it = map.find(key, hash_value);
chassert(it != map.end());
if (it == map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find key: '{}'", key);
auto list_itr = it->getMapped();
uint64_t old_value_size = list_itr->value.sizeInBytes();
@ -348,7 +354,8 @@ public:
const V & getValue(StringRef key) const
{
auto it = map.find(key);
chassert(it);
if (it == map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not find key: '{}'", key);
return it->getMapped()->value;
}
@ -356,7 +363,8 @@ public:
{
for (auto & itr : snapshot_invalid_iters)
{
chassert(!itr->isActiveInMap());
if (itr->isActiveInMap())
throw Exception(ErrorCodes::LOGICAL_ERROR, "{} is not active in map", itr->key);
updateDataSize(ERASE, itr->key.size, 0, itr->value.sizeInBytes(), /*remove_old=*/true);
if (itr->getFreeKey())
arena.free(const_cast<char *>(itr->key.data), itr->key.size);

View File

@ -1242,6 +1242,9 @@ Set the quoting rule for identifiers in SHOW CREATE query
)", 0) \
DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"(
Set the quoting style for identifiers in SHOW CREATE query
)", 0) \
DECLARE(String, composed_data_type_output_format_mode, "default", R"(
Set composed data type output format mode, default or spark.
)", 0) \
// End of FORMAT_FACTORY_SETTINGS

View File

@ -1816,6 +1816,22 @@ Possible values:
- 0 Disabled.
- 1 Enabled.
)", 0) \
DECLARE(Map, http_response_headers, "", R"(
Allows to add or override HTTP headers which the server will return in the response with a successful query result.
This only affects the HTTP interface.
If the header is already set by default, the provided value will override it.
If the header was not set by default, it will be added to the list of headers.
Headers that are set by the server by default and not overridden by this setting, will remain.
The setting allows you to set a header to a constant value. Currently there is no way to set a header to a dynamically calculated value.
Neither names or values can contain ASCII control characters.
If you implement a UI application which allows users to modify settings but at the same time makes decisions based on the returned headers, it is recommended to restrict this setting to readonly.
Example: `SET http_response_headers = '{"Content-Type": "image/png"}'`
)", 0) \
\
DECLARE(String, count_distinct_implementation, "uniqExact", R"(

View File

@ -67,6 +67,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"max_bytes_ratio_before_external_group_by", 0., 0., "New setting."},
{"max_bytes_ratio_before_external_sort", 0., 0., "New setting."},
{"use_async_executor_for_materialized_views", false, false, "New setting."},
{"composed_data_type_output_format_mode", "default", "default", "New setting"},
{"http_response_headers", "", "", "New setting."},
}
},
{"24.11",

View File

@ -60,12 +60,11 @@ void SerializationAggregateFunction::serializeBinaryBulk(const IColumn & column,
const ColumnAggregateFunction & real_column = typeid_cast<const ColumnAggregateFunction &>(column);
const ColumnAggregateFunction::Container & vec = real_column.getData();
ColumnAggregateFunction::Container::const_iterator it = vec.begin() + offset;
ColumnAggregateFunction::Container::const_iterator end = limit ? it + limit : vec.end();
size_t end = vec.size();
if (limit)
end = std::min(end, offset + limit);
end = std::min(end, vec.end());
for (; it != end; ++it)
function->serialize(*it, ostr, version);
function->serializeBatch(vec, offset, end, ostr, version);
}
void SerializationAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const

View File

@ -401,7 +401,7 @@ void SerializationArray::deserializeBinaryBulkWithMultipleStreams(
template <typename Writer>
static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
static void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, Writer && write_nested)
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
@ -412,10 +412,14 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
const IColumn & nested_column = column_array.getData();
writeChar('[', ostr);
for (size_t i = offset; i < next_offset; ++i)
if (next_offset != offset)
write_nested(nested_column, offset);
for (size_t i = offset + 1; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
writeChar(',', ostr);
if (settings.composed_data_type_output_format_mode == "spark")
writeChar(' ', ostr);
write_nested(nested_column, i);
}
writeChar(']', ostr);
@ -520,10 +524,13 @@ static ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reade
void SerializationArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
serializeTextImpl(column, row_num, settings, ostr,
[&](const IColumn & nested_column, size_t i)
{
nested->serializeTextQuoted(nested_column, i, ostr, settings);
if (settings.composed_data_type_output_format_mode == "spark")
nested->serializeText(nested_column, i, ostr, settings);
else
nested->serializeTextQuoted(nested_column, i, ostr, settings);
});
}

View File

@ -90,6 +90,7 @@ template <typename KeyWriter, typename ValueWriter>
void SerializationMap::serializeTextImpl(
const IColumn & column,
size_t row_num,
const FormatSettings & settings,
WriteBuffer & ostr,
KeyWriter && key_writer,
ValueWriter && value_writer) const
@ -104,15 +105,31 @@ void SerializationMap::serializeTextImpl(
size_t next_offset = offsets[row_num];
writeChar('{', ostr);
for (size_t i = offset; i < next_offset; ++i)
if (offset != next_offset)
{
if (i != offset)
writeChar(',', ostr);
key_writer(ostr, key, nested_tuple.getColumn(0), i);
writeChar(':', ostr);
value_writer(ostr, value, nested_tuple.getColumn(1), i);
key_writer(ostr, key, nested_tuple.getColumn(0), offset);
if (settings.composed_data_type_output_format_mode == "spark")
writeString(std::string_view(" -> "), ostr);
else
writeChar(':', ostr);
value_writer(ostr, value, nested_tuple.getColumn(1), offset);
}
if (settings.composed_data_type_output_format_mode == "spark")
for (size_t i = offset + 1; i < next_offset; ++i)
{
writeString(std::string_view(", "), ostr);
key_writer(ostr, key, nested_tuple.getColumn(0), i);
writeString(std::string_view(" -> "), ostr);
value_writer(ostr, value, nested_tuple.getColumn(1), i);
}
else
for (size_t i = offset + 1; i < next_offset; ++i)
{
writeChar(',', ostr);
key_writer(ostr, key, nested_tuple.getColumn(0), i);
writeChar(':', ostr);
value_writer(ostr, value, nested_tuple.getColumn(1), i);
}
writeChar('}', ostr);
}
@ -221,10 +238,13 @@ void SerializationMap::serializeText(const IColumn & column, size_t row_num, Wri
{
auto writer = [&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
if (settings.composed_data_type_output_format_mode == "spark")
subcolumn_serialization->serializeText(subcolumn, pos, buf, settings);
else
subcolumn_serialization->serializeTextQuoted(subcolumn, pos, buf, settings);
};
serializeTextImpl(column, row_num, ostr, writer, writer);
serializeTextImpl(column, row_num, settings, ostr, writer, writer);
}
void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const
@ -266,7 +286,7 @@ bool SerializationMap::tryDeserializeText(IColumn & column, ReadBuffer & istr, c
void SerializationMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
serializeTextImpl(column, row_num, settings, ostr,
[&settings](WriteBuffer & buf, const SerializationPtr & subcolumn_serialization, const IColumn & subcolumn, size_t pos)
{
/// We need to double-quote all keys (including integers) to produce valid JSON.

View File

@ -70,7 +70,7 @@ public:
private:
template <typename KeyWriter, typename ValueWriter>
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
void serializeTextImpl(const IColumn & column, size_t row_num, const FormatSettings & settings, WriteBuffer & ostr, KeyWriter && key_writer, ValueWriter && value_writer) const;
template <typename ReturnType = void, typename Reader>
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;

View File

@ -137,12 +137,25 @@ void SerializationTuple::deserializeBinary(IColumn & column, ReadBuffer & istr,
void SerializationTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('(', ostr);
for (size_t i = 0; i < elems.size(); ++i)
if (!elems.empty())
{
if (i != 0)
writeChar(',', ostr);
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
if (settings.composed_data_type_output_format_mode == "spark")
elems[0]->serializeText(extractElementColumn(column, 0), row_num, ostr, settings);
else
elems[0]->serializeTextQuoted(extractElementColumn(column, 0), row_num, ostr, settings);
}
if (settings.composed_data_type_output_format_mode == "spark")
for (size_t i = 1; i < elems.size(); ++i)
{
writeString(std::string_view(", "), ostr);
elems[i]->serializeText(extractElementColumn(column, i), row_num, ostr, settings);
}
else
for (size_t i = 1; i < elems.size(); ++i)
{
writeChar(',', ostr);
elems[i]->serializeTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
}
writeChar(')', ostr);
}

View File

@ -251,6 +251,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.values.deduce_templates_of_expressions = settings[Setting::input_format_values_deduce_templates_of_expressions];
format_settings.values.interpret_expressions = settings[Setting::input_format_values_interpret_expressions];
format_settings.values.escape_quote_with_quote = settings[Setting::output_format_values_escape_quote_with_quote];
format_settings.composed_data_type_output_format_mode = settings[Setting::composed_data_type_output_format_mode];
format_settings.with_names_use_header = settings[Setting::input_format_with_names_use_header];
format_settings.with_types_use_header = settings[Setting::input_format_with_types_use_header];
format_settings.write_statistics = settings[Setting::output_format_write_statistics];

View File

@ -38,6 +38,7 @@ struct FormatSettings
bool try_infer_variant = false;
bool seekable_read = true;
String composed_data_type_output_format_mode = "default";
UInt64 max_rows_to_read_for_schema_inference = 25000;
UInt64 max_bytes_to_read_for_schema_inference = 32 * 1024 * 1024;

View File

@ -63,7 +63,7 @@ MarksInCompressedFile::MarksInCompressedFile(const PlainArray & marks)
// Overallocate by +1 element to let the bit packing/unpacking do less bounds checking.
size_t packed_length = (packed_bits + 63) / 64 + 1;
packed.reserve(packed_length);
packed.reserve_exact(packed_length);
packed.resize_fill(packed_length);
// Second pass: write out the packed marks.
@ -97,7 +97,7 @@ std::tuple<const MarksInCompressedFile::BlockInfo *, size_t> MarksInCompressedFi
size_t MarksInCompressedFile::approximateMemoryUsage() const
{
return sizeof(*this) + blocks.size() * sizeof(blocks[0]) + packed.size() * sizeof(packed[0]);
return sizeof(*this) + blocks.allocated_bytes() + packed.allocated_bytes();
}
}

View File

@ -35,18 +35,29 @@ TEST(Marks, Compression)
EXPECT_LE((marks.approximateMemoryUsage() - sizeof(MarksInCompressedFile)) * 8, plain.size() * max_bits_per_mark);
};
// Typical.
test(gen(10000, 1'000'000, 0), 30);
{
SCOPED_TRACE("Typical");
test(gen(10000, 1'000'000, 0), 30);
}
// Completely random 64-bit values.
test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);
// All zeros.
test(gen(10000, 0, 0), 2);
{
SCOPED_TRACE("Completely random 64-bit values");
test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);
}
// Short.
test(gen(10, 1000, 1000), 65);
{
SCOPED_TRACE("All zeros");
test(gen(10000, 0, 0), 2);
}
// Empty.
test(gen(0, 0, 0), 0);
{
SCOPED_TRACE("Short");
test(gen(10, 1000, 1000), 65);
}
{
SCOPED_TRACE("Empty");
test(gen(0, 0, 0), 0);
}
}

View File

@ -1633,29 +1633,29 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (isReplicated(*inner_table_engine))
is_storage_replicated = true;
}
}
}
bool allow_heavy_populate = getContext()->getSettingsRef()[Setting::database_replicated_allow_heavy_create] && create.is_populate;
if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
bool allow_heavy_populate = getContext()->getSettingsRef()[Setting::database_replicated_allow_heavy_create] && create.is_populate;
if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate))
{
const bool allow_create_select_for_replicated
= (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
if (!allow_create_select_for_replicated)
{
const bool allow_create_select_for_replicated
= (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated;
if (!allow_create_select_for_replicated)
{
/// POPULATE can be enabled with setting, provide hint in error message
if (create.is_populate)
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT "
"queries. "
"Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with "
"caution");
/// POPULATE can be enabled with setting, provide hint in error message
if (create.is_populate)
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries.");
}
"CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT "
"queries. "
"Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with "
"caution");
throw Exception(
ErrorCodes::SUPPORT_IS_DISABLED,
"CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries.");
}
}
if (create.is_clone_as)
{

View File

@ -163,6 +163,7 @@ namespace Setting
extern const SettingsSeconds wait_for_async_insert_timeout;
extern const SettingsBool implicit_select;
extern const SettingsBool enforce_strict_identifier_format;
extern const SettingsMap http_response_headers;
}
namespace ErrorCodes
@ -179,6 +180,7 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
extern const int SUPPORT_IS_DISABLED;
extern const int INCORRECT_QUERY;
extern const int BAD_ARGUMENTS;
}
namespace FailPoints
@ -1682,6 +1684,33 @@ void executeQuery(
/// But `session_timezone` setting could be modified in the query itself, so we update the value.
result_details.timezone = DateLUT::instance().getTimeZone();
const Map & additional_http_headers = context->getSettingsRef()[Setting::http_response_headers].value;
if (!additional_http_headers.empty())
{
for (const auto & key_value : additional_http_headers)
{
if (key_value.getType() != Field::Types::Tuple
|| key_value.safeGet<Tuple>().size() != 2)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the `additional_http_headers` setting must be a Map");
if (key_value.safeGet<Tuple>().at(0).getType() != Field::Types::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The keys of the `additional_http_headers` setting must be Strings");
if (key_value.safeGet<Tuple>().at(1).getType() != Field::Types::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The values of the `additional_http_headers` setting must be Strings");
String key = key_value.safeGet<Tuple>().at(0).safeGet<String>();
String value = key_value.safeGet<Tuple>().at(1).safeGet<String>();
if (std::find_if(key.begin(), key.end(), isControlASCII) != key.end()
|| std::find_if(value.begin(), value.end(), isControlASCII) != value.end())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The values of the `additional_http_headers` cannot contain ASCII control characters");
if (!result_details.additional_headers.emplace(key, value).second)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are duplicate entries in the `additional_http_headers` setting");
}
}
auto & pipeline = streams.pipeline;
std::unique_ptr<WriteBuffer> compressed_buffer;

View File

@ -24,6 +24,7 @@ struct QueryResultDetails
std::optional<String> content_type = {};
std::optional<String> format = {};
std::optional<String> timezone = {};
std::unordered_map<String, String> additional_headers = {};
};
using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;
@ -42,7 +43,7 @@ void executeQuery(
WriteBuffer & ostr, /// Where to write query output to.
bool allow_into_outfile, /// If true and the query contains INTO OUTFILE section, redirect output to that file.
ContextMutablePtr context, /// DB, tables, data types, storage engines, functions, aggregate functions...
SetResultDetailsFunc set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone.
SetResultDetailsFunc set_result_details, /// If a non-empty callback is passed, it will be called with the query id, the content-type, the format, and the timezone, as well as additional headers.
QueryFlags flags = {},
const std::optional<FormatSettings> & output_format_settings = std::nullopt, /// Format settings for output format, will be calculated from the context if not set.
HandleExceptionInOutputFormatFunc handle_exception_in_output_format = {} /// If a non-empty callback is passed, it will be called on exception with created output format.

View File

@ -524,6 +524,9 @@ void HTTPHandler::processQuery(
if (details.timezone)
response.add("X-ClickHouse-Timezone", *details.timezone);
for (const auto & [name, value] : details.additional_headers)
response.set(name, value);
};
auto handle_exception_in_output_format = [&](IOutputFormat & current_output_format,

View File

@ -44,8 +44,6 @@ MergeTreeSink::~MergeTreeSink()
if (!delayed_chunk)
return;
chassert(isCancelled());
for (auto & partition : delayed_chunk->partitions)
{
partition.temp_part.cancel();

View File

@ -118,7 +118,7 @@ struct MergedBlockOutputStream::Finalizer::Impl
}
void finish();
void cancel() noexcept;
void cancel();
};
void MergedBlockOutputStream::Finalizer::finish()
@ -129,7 +129,7 @@ void MergedBlockOutputStream::Finalizer::finish()
to_finish->finish();
}
void MergedBlockOutputStream::Finalizer::cancel() noexcept
void MergedBlockOutputStream::Finalizer::cancel()
{
std::unique_ptr<Impl> to_cancel = std::move(impl);
impl.reset();
@ -166,7 +166,7 @@ void MergedBlockOutputStream::Finalizer::Impl::finish()
part->getDataPartStorage().removeFile(file_name);
}
void MergedBlockOutputStream::Finalizer::Impl::cancel() noexcept
void MergedBlockOutputStream::Finalizer::Impl::cancel()
{
writer.cancel();
@ -182,8 +182,15 @@ MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr<Impl> impl_) : imp
MergedBlockOutputStream::Finalizer::~Finalizer()
{
if (impl)
cancel();
try
{
if (impl)
finish();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}

View File

@ -54,7 +54,7 @@ public:
~Finalizer();
void finish();
void cancel() noexcept;
void cancel();
};
/// Finalize writing part and fill inner structures

View File

@ -44,10 +44,19 @@
<election_timeout_upper_bound_ms>5000</election_timeout_upper_bound_ms>
<raft_logs_level>information</raft_logs_level>
<force_sync>false</force_sync>
<async_replication>1</async_replication>
<!-- we want all logs for complex problems investigation -->
<reserved_log_items>1000000000000000</reserved_log_items>
</coordination_settings>
<feature_flags>
<filtered_list>1</filtered_list>
<multi_read>1</multi_read>
<check_not_exists>1</check_not_exists>
<create_if_not_exists>1</create_if_not_exists>
<remove_recursive>1</remove_recursive>
</feature_flags>
<raft_configuration>
<server>
<id>1</id>

View File

@ -191,8 +191,9 @@ def test_invalid_snapshot(started_cluster):
]
)
node.start_clickhouse(start_wait_sec=120, expected_to_fail=True)
assert node.contains_in_log("Failure to load from latest snapshot with index")
assert node.contains_in_log(
"Aborting because of failure to load from latest snapshot with index"
"Manual intervention is necessary for recovery. Problematic snapshot can be removed but it will lead to data loss"
)
node.stop_clickhouse()

View File

@ -0,0 +1,10 @@
<test>
<create_query>DROP TABLE IF EXISTS test_avg_insert</create_query>
<create_query>
CREATE TABLE test_avg_insert (key UInt64, value AggregateFunction(avg, UInt8)) ENGINE = Memory()
</create_query>
<query>INSERT INTO test_avg_insert with initializeAggregation('avgState', 1) as s select number, s AS value FROM numbers(200000000)</query>
<drop_query>DROP TABLE IF EXISTS test_avg_insert</drop_query>
</test>

View File

@ -0,0 +1,16 @@
-- array format --
[\'1\']
[1, 2, abc, \'1\']
[1, 2, abc, \'1\']
[1, 2, abc, \'1\']
[1, 2, abc, \'1\']
-- map format --
{1343 -> fe, afe -> fefe}
{1343 -> fe, afe -> fefe}
{1343 -> fe, afe -> fefe}
{1343 -> fe, afe -> fefe}
-- tuple format --
(1, 3, abc)
(1, 3, abc)
(1, 3, abc)
(1, 3, abc)

View File

@ -0,0 +1,18 @@
SELECT '-- array format --';
SELECT CAST(array('\'1\'') , 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT CAST([materialize('1'), '2', 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT CAST([materialize('1'), materialize('2'), 'abc', '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), '\'1\''], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT CAST([materialize('1'), materialize('2'), materialize('abc'), materialize('\'1\'')], 'String') SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT '-- map format --';
SELECT toString(map('1343', 'fe', 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString(map(materialize('1343'), materialize('fe'), 'afe', 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), 'fefe')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString(map(materialize('1343'), materialize('fe'), materialize('afe'), materialize('fefe'))) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT '-- tuple format --';
SELECT toString(('1', '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString((materialize('1'), '3', 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString((materialize('1'), materialize('3'), 'abc')) SETTINGS composed_data_type_output_format_mode = 'spark';
SELECT toString((materialize('1'), materialize('3'), materialize('abc'))) SETTINGS composed_data_type_output_format_mode = 'spark';

View File

@ -0,0 +1,17 @@
We can add a new header:
> POST /?http_response_headers={'My-New-Header':'Hello,+world.'} HTTP/1.1
< My-New-Header: Hello, world.
It works even with the settings clause:
< My-New-Header: Hello, world.
Check the default header value:
> Content-Type: application/x-www-form-urlencoded
< Content-Type: text/tab-separated-values; charset=UTF-8
Check that we can override it:
> POST /?http_response_headers={'Content-Type':'image/png'} HTTP/1.1
> Content-Type: application/x-www-form-urlencoded
< Content-Type: image/png
It does not allow bad characters:
BAD_ARGUMENTS
BAD_ARGUMENTS
It does not let duplicate entries:
BAD_ARGUMENTS

View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
echo "We can add a new header:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/?http_response_headers={'My-New-Header':'Hello,+world.'}" -d "SELECT 1" 2>&1 | grep -i 'My-New'
echo "It works even with the settings clause:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My-New-Header':'Hello, world.'}\$\$" 2>&1 | grep -i 'My-New'
echo "Check the default header value:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1" 2>&1 | grep -i 'Content-Type'
echo "Check that we can override it:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/?http_response_headers={'Content-Type':'image/png'}" -d "SELECT 1" 2>&1 | grep -i 'Content-Type'
echo "It does not allow bad characters:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My-New-Header':'Hello,\n\nworld.'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'My\rNew-Header':'Hello, world.'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'
echo "It does not let duplicate entries:"
${CLICKHOUSE_CURL} -sS --globoff -v "http://localhost:8123/" -d "SELECT 1 SETTINGS http_response_headers = \$\${'a':'b','a':'c'}\$\$" 2>&1 | grep -o -F 'BAD_ARGUMENTS'