Merge branch 'master' into kvm-clock

This commit is contained in:
Sergei Trifonov 2022-09-12 17:10:08 +02:00 committed by GitHub
commit 2f041d9567
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 129 additions and 83 deletions

View File

@ -61,11 +61,6 @@ struct StringRef
constexpr explicit operator std::string_view() const { return std::string_view(data, size); }
};
/// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/
/// nullptr can't be used because the StringRef values are used in SipHash's pointer arithmetic
/// and the UBSan thinks that something like nullptr + 8 is UB.
constexpr const inline char empty_string_ref_addr{};
constexpr const inline StringRef EMPTY_STRING_REF{&empty_string_ref_addr, 0};
using StringRefs = std::vector<StringRef>;

View File

@ -24,6 +24,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT;
extern const int NOT_IMPLEMENTED;
}
@ -40,6 +41,14 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN};
}
StringRef ColumnNullable::getDataAt(size_t n) const
{
if (!isNullAt(n))
return getNestedColumn().getDataAt(n);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDataAt is not supported for {} in case if value is NULL", getName());
}
void ColumnNullable::updateHashWithValue(size_t n, SipHash & hash) const
{
const auto & arr = getNullMapData();

View File

@ -59,19 +59,7 @@ public:
bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); }
UInt64 get64(size_t n) const override { return nested_column->get64(n); }
bool isDefaultAt(size_t n) const override { return isNullAt(n); }
/**
* If isNullAt(n) returns false, returns the nested column's getDataAt(n), otherwise returns a special value
* EMPTY_STRING_REF indicating that data is not present.
*/
StringRef getDataAt(size_t n) const override
{
if (isNullAt(n))
return EMPTY_STRING_REF;
return getNestedColumn().getDataAt(n);
}
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;

View File

@ -136,15 +136,16 @@ public:
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
/// This is strange. Please remove this method as soon as possible.
std::optional<UInt64> getOrFindValueIndex(StringRef value) const override
{
if (std::optional<UInt64> res = reverse_index.getIndex(value); res)
return res;
auto& nested = *getNestedColumn();
const IColumn & nested = *getNestedColumn();
for (size_t i = 0; i < nested.size(); ++i)
if (nested.getDataAt(i) == value)
if (!nested.isNullAt(i) && nested.getDataAt(i) == value)
return i;
return {};

View File

@ -65,30 +65,12 @@ public:
virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0;
/// Returns dictionary hash which is sipHash is applied to each row of nested column.
/// Returns dictionary hash which is SipHash is applied to each row of nested column.
virtual UInt128 getHash() const = 0;
const char * getFamilyName() const override { return "ColumnUnique"; }
TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
/**
* Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hashtable.
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
* - On the function call it's present. Therefore we obtain the index in O(1).
* - The reverse index is absent. We search for the index linearly.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
*
* The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns.
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
void insert(const Field &) override
{
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
@ -190,6 +172,23 @@ public:
{
throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
/** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hashtable.
*
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
* - On the function call it's present. Therefore we obtain the index in O(1).
* - The reverse index is absent. We search for the index linearly.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
*
* The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns.
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
};
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;

View File

@ -740,42 +740,34 @@ private:
const auto [null_map_data, null_map_item] = getNullMaps(arguments);
const IColumn& col_arg = *arguments[1].column.get();
if (const ColumnConst * const col_arg_const = checkAndGetColumn<ColumnConst>(col_arg))
if (const ColumnConst * col_arg_const = checkAndGetColumn<ColumnConst>(*arguments[1].column))
{
const IColumnUnique& col_lc_dict = col_lc->getDictionary();
const bool different_inner_types = col_lc_dict.isNullable()
? !col_arg_const->structureEquals(*col_lc_dict.getNestedColumn().get())
: true; // Can't compare so ignore this check
const bool use_cloned_arg = col_arg_const->isNumeric()
// outer types do not match
&& !col_arg_const->structureEquals(col_lc_dict)
// inner types do not match (like A and Nullable(B) or A and Const(B));
&& different_inner_types;
const IColumnUnique & col_lc_dict = col_lc->getDictionary();
const DataTypeArray * const array_type = checkAndGetDataType<DataTypeArray>(arguments[0].type.get());
const DataTypePtr target_type_ptr = recursiveRemoveLowCardinality(array_type->getNestedType());
const ColumnPtr col_arg_cloned = use_cloned_arg
? castColumn(arguments[1], target_type_ptr)
: col_arg_const->getPtr();
ColumnPtr col_arg_cloned = castColumn(
{col_arg_const->getDataColumnPtr(), arguments[1].type, arguments[1].name}, target_type_ptr);
const StringRef elem = col_arg_cloned->getDataAt(0);
ResultColumnPtr col_result = ResultColumnType::create();
UInt64 index = 0;
if (elem != EMPTY_STRING_REF)
if (!col_arg_cloned->isNullAt(0))
{
if (col_arg_cloned->isNullable())
col_arg_cloned = checkAndGetColumn<ColumnNullable>(*col_arg_cloned)->getNestedColumnPtr();
StringRef elem = col_arg_cloned->getDataAt(0);
if (std::optional<UInt64> maybe_index = col_lc_dict.getOrFindValueIndex(elem); maybe_index)
{
index = *maybe_index;
}
else
{
const size_t offsets_size = col_array->getOffsets().size();
auto& data = col_result->getData();
auto & data = col_result->getData();
data.resize_fill(offsets_size);
@ -786,7 +778,7 @@ private:
Impl::Main<ConcreteAction, true>::vector(
col_lc->getIndexes(),
col_array->getOffsets(),
index,
index, /** Assuming LowCardinality has index of NULL always as zero. */
col_result->getData(),
null_map_data,
null_map_item);
@ -800,9 +792,9 @@ private:
const NullMap * const null_map_left_casted = &left_nullable.getNullMapColumn().getData();
const IColumn& left_ptr = left_nullable.getNestedColumn();
const IColumn & left_ptr = left_nullable.getNestedColumn();
const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
const ColumnNullable * const right_nullable = checkAndGetColumn<ColumnNullable>(right_casted.get());
const NullMap * const null_map_right_casted = right_nullable
@ -825,17 +817,17 @@ private:
}
else // LowCardinality(T) and U, T not Nullable
{
if (col_arg.isNullable())
if (arguments[1].column->isNullable())
return nullptr;
if (const auto* const arg_lc = checkAndGetColumn<ColumnLowCardinality>(&col_arg);
if (const auto* const arg_lc = checkAndGetColumn<ColumnLowCardinality>(arguments[1].column.get());
arg_lc && arg_lc->isNullable())
return nullptr;
// LowCardinality(T) and U (possibly LowCardinality(V))
const ColumnPtr left_casted = col_lc->convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = col_arg.convertToFullColumnIfLowCardinality();
const ColumnPtr right_casted = arguments[1].column->convertToFullColumnIfLowCardinality();
ExecutionData data =
{

View File

@ -17,8 +17,11 @@ RawBLOBRowOutputFormat::RawBLOBRowOutputFormat(
void RawBLOBRowOutputFormat::writeField(const IColumn & column, const ISerialization &, size_t row_num)
{
std::string_view value = column.getDataAt(row_num).toView();
out.write(value.data(), value.size());
if (!column.isNullAt(row_num))
{
auto value = column.getDataAt(row_num);
out.write(value.data, value.size);
}
}
@ -35,4 +38,3 @@ void registerOutputFormatRawBLOB(FormatFactory & factory)
}
}

View File

@ -1,9 +1,12 @@
#include <Storages/MergeTree/DataPartsExchange.h>
#include <Common/config.h>
#include <Formats/NativeWriter.h>
#include <Disks/SingleDiskVolume.h>
#include <Disks/createVolume.h>
#include <IO/HTTPCommon.h>
#include <IO/S3Common.h>
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/HTTPServerResponse.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
@ -572,11 +575,25 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart(
{
return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix, disk, *in, throttler);
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR)
throw;
#if USE_AWS_S3
if (const auto * s3_exception = dynamic_cast<const S3Exception *>(&e))
{
/// It doesn't make sense to retry Access Denied or No Such Key
if (!s3_exception->isRetryableError())
{
tryLogCurrentException(log, fmt::format("while fetching part: {}", part_name));
throw;
}
}
#endif
LOG_WARNING(log, fmt::runtime(e.message() + " Will retry fetching part without zero-copy."));
/// It's important to release session from HTTP pool. Otherwise it's possible to get deadlock

View File

@ -2137,13 +2137,8 @@ void MergeTreeData::dropAllData()
auto lock = lockParts();
LOG_TRACE(log, "dropAllData: removing data from memory.");
DataPartsVector all_parts(data_parts_by_info.begin(), data_parts_by_info.end());
data_parts_indexes.clear();
column_sizes.clear();
{
std::lock_guard wal_lock(write_ahead_log_mutex);
if (write_ahead_log)
@ -2155,10 +2150,33 @@ void MergeTreeData::dropAllData()
if (!getStorageID().hasUUID())
getContext()->dropCaches();
LOG_TRACE(log, "dropAllData: removing data from filesystem.");
/// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls.
clearPartsFromFilesystem(all_parts);
NameSet part_names_failed;
try
{
LOG_TRACE(log, "dropAllData: removing data parts (count {}) from filesystem.", all_parts.size());
clearPartsFromFilesystem(all_parts, true, &part_names_failed);
LOG_TRACE(log, "dropAllData: removing all data parts from memory.");
data_parts_indexes.clear();
}
catch (...)
{
/// Removing from memory only successfully removed parts from disk
/// Parts removal process can be important and on the next try it's better to try to remove
/// them instead of remove recursive call.
LOG_WARNING(log, "dropAllData: got exception removing parts from disk, removing successfully removed parts from memory.");
for (const auto & part : all_parts)
{
if (!part_names_failed.contains(part->name))
data_parts_indexes.erase(part->info);
}
throw;
}
column_sizes.clear();
for (const auto & disk : getDisks())
{
@ -2167,6 +2185,7 @@ void MergeTreeData::dropAllData()
try
{
LOG_INFO(log, "dropAllData: removing table directory recursive to cleanup garbage");
disk->removeRecursive(relative_data_path);
}
catch (const fs::filesystem_error & e)

View File

@ -822,6 +822,18 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada
} while (code == Coordination::Error::ZBADVERSION);
}
zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeperIfTableShutDown() const
{
zkutil::ZooKeeperPtr maybe_new_zookeeper;
if (zookeeper_name == default_zookeeper_name)
maybe_new_zookeeper = getContext()->getZooKeeper();
else
maybe_new_zookeeper = getContext()->getAuxiliaryZooKeeper(zookeeper_name);
maybe_new_zookeeper->sync(zookeeper_path);
return maybe_new_zookeeper;
}
void StorageReplicatedMergeTree::drop()
{
/// There is also the case when user has configured ClickHouse to wrong ZooKeeper cluster
@ -833,11 +845,7 @@ void StorageReplicatedMergeTree::drop()
{
/// Table can be shut down, restarting thread is not active
/// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice.
zkutil::ZooKeeperPtr zookeeper;
if (zookeeper_name == default_zookeeper_name)
zookeeper = getContext()->getZooKeeper();
else
zookeeper = getContext()->getAuxiliaryZooKeeper(zookeeper_name);
zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown();
/// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
if (!zookeeper)
@ -7569,7 +7577,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedData(const IMer
if (!part.data_part_storage || !part.data_part_storage->supportZeroCopyReplication())
{
LOG_TRACE(log, "Part {} is not stored on zero-copy replicaed disk, blobs can be removed", part.name);
LOG_TRACE(log, "Part {} is not stored on zero-copy replicated disk, blobs can be removed", part.name);
return std::make_pair(true, NameSet{});
}
@ -7594,7 +7602,16 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedData(const IMer
return std::make_pair(true, NameSet{});
}
return unlockSharedDataByID(part.getUniqueId(), getTableSharedID(), part.name, replica_name, part.data_part_storage->getDiskType(), getZooKeeper(), *getSettings(), log,
/// We remove parts during table shutdown. If exception happen, restarting thread will be already turned
/// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from
/// context.
zkutil::ZooKeeperPtr zookeeper;
if (shutdown_called.load())
zookeeper = getZooKeeperIfTableShutDown();
else
zookeeper = getZooKeeper();
return unlockSharedDataByID(part.getUniqueId(), getTableSharedID(), part.name, replica_name, part.data_part_storage->getDiskType(), zookeeper, *getSettings(), log,
zookeeper_path);
}

View File

@ -364,6 +364,12 @@ private:
zkutil::ZooKeeperPtr tryGetZooKeeper() const;
zkutil::ZooKeeperPtr getZooKeeper() const;
/// Get connection from global context and reconnect if needed.
/// NOTE: use it only when table is shut down, in all other cases
/// use getZooKeeper() because it is managed by restarting thread
/// which guarantees that we have only one connected object
/// for table.
zkutil::ZooKeeperPtr getZooKeeperIfTableShutDown() const;
zkutil::ZooKeeperPtr getZooKeeperAndAssertNotReadonly() const;
void setZooKeeper();

View File

@ -0,0 +1 @@
SELECT contingency(1, [1, NULL]); -- { serverError 48 }