Merge pull request #7979 from ClickHouse/fix_check_table_with_empty_pk

Fix bug in check table for tables without primary key
This commit is contained in:
alexey-milovidov 2019-12-03 02:52:28 +03:00 committed by GitHub
commit c39bcf2b51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 39 additions and 104 deletions

View File

@ -217,31 +217,25 @@ MergeTreeData::DataPart::Checksums checkDataPart(
MergeTreeData::DataPart::Checksums checksums_data;
size_t marks_in_primary_key = 0;
if (!primary_key_data_types.empty())
{
ReadBufferFromFile file_buf(path + "primary.idx");
HashingReadBuffer hashing_buf(file_buf);
if (!primary_key_data_types.empty())
{
size_t key_size = primary_key_data_types.size();
MutableColumns tmp_columns(key_size);
size_t key_size = primary_key_data_types.size();
MutableColumns tmp_columns(key_size);
for (size_t j = 0; j < key_size; ++j)
tmp_columns[j] = primary_key_data_types[j]->createColumn();
while (!hashing_buf.eof())
{
if (is_cancelled())
return {};
++marks_in_primary_key;
for (size_t j = 0; j < key_size; ++j)
tmp_columns[j] = primary_key_data_types[j]->createColumn();
while (!hashing_buf.eof())
{
if (is_cancelled())
return {};
++marks_in_primary_key;
for (size_t j = 0; j < key_size; ++j)
primary_key_data_types[j]->deserializeBinary(*tmp_columns[j].get(), hashing_buf);
}
}
else
{
hashing_buf.tryIgnore(std::numeric_limits<size_t>::max());
primary_key_data_types[j]->deserializeBinary(*tmp_columns[j].get(), hashing_buf);
}
size_t primary_idx_size = hashing_buf.count();

View File

@ -15,7 +15,7 @@ namespace DB
MergeTreeData::DataPart::Checksums checkDataPart(
MergeTreeData::DataPartPtr data_part,
bool require_checksums,
const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array.
const DataTypes & primary_key_data_types,
const MergeTreeIndices & indices = {}, /// Check skip indices
std::function<bool()> is_cancelled = []{ return false; });
@ -24,7 +24,7 @@ MergeTreeData::DataPart::Checksums checkDataPart(
const MergeTreeIndexGranularity & index_granularity,
const String & marks_file_extension,
bool require_checksums,
const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array.
const DataTypes & primary_key_data_types,
const MergeTreeIndices & indices = {}, /// Check skip indices
std::function<bool()> is_cancelled = []{ return false; });
}

View File

@ -4,9 +4,6 @@ target_link_libraries (system_numbers PRIVATE dbms clickhouse_storages_system cl
add_executable (storage_log storage_log.cpp)
target_link_libraries (storage_log PRIVATE dbms)
add_executable (part_checker part_checker.cpp)
target_link_libraries (part_checker PRIVATE dbms)
add_executable (part_name part_name.cpp)
target_link_libraries (part_name PRIVATE dbms)

View File

@ -1,80 +0,0 @@
#include <Poco/ConsoleChannel.h>
#include <Poco/DirectoryIterator.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Common/Exception.h>
using namespace DB;
Poco::Path getMarksFile(const std::string & part_path)
{
Poco::DirectoryIterator it(part_path);
Poco::DirectoryIterator end;
while (it != end)
{
Poco::Path p(it.path());
auto extension = p.getExtension();
if (extension == "mrk2" || extension == "mrk")
return p;
++it;
}
throw Exception("Cannot find any mark file in directory " + part_path, DB::ErrorCodes::POCO_EXCEPTION);
}
MergeTreeIndexGranularity readGranularity(const Poco::Path & mrk_file_path, size_t fixed_granularity)
{
MergeTreeIndexGranularity result;
auto extension = mrk_file_path.getExtension();
DB::ReadBufferFromFile mrk_in(mrk_file_path.toString());
for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num)
{
UInt64 offset_in_compressed_file = 0;
UInt64 offset_in_decompressed_block = 0;
DB::readBinary(offset_in_compressed_file, mrk_in);
DB::readBinary(offset_in_decompressed_block, mrk_in);
UInt64 index_granularity_rows = 0;
if (extension == "mrk2")
DB::readBinary(index_granularity_rows, mrk_in);
else
index_granularity_rows = fixed_granularity;
result.appendMark(index_granularity_rows);
}
return result;
}
int main(int argc, char ** argv)
{
Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
Logger::root().setChannel(channel);
Logger::root().setLevel("trace");
if (argc != 4)
{
std::cerr << "Usage: " << argv[0] << " path strict index_granularity" << std::endl;
return 1;
}
try
{
std::string full_path{argv[1]};
auto mrk_file_path = getMarksFile(full_path);
size_t fixed_granularity{parse<size_t>(argv[3])};
auto adaptive_granularity = readGranularity(mrk_file_path, fixed_granularity);
auto marks_file_extension = "." + mrk_file_path.getExtension();
bool require_checksums = parse<bool>(argv[2]);
checkDataPart(full_path, adaptive_granularity, marks_file_extension, require_checksums, {});
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
throw;
}
return 0;
}

View File

@ -0,0 +1,2 @@
all_1_1_0 1
all_0_0_0 1

View File

@ -0,0 +1,22 @@
SET check_query_single_value_result = 0;
SET send_logs_level = 'none';
DROP TABLE IF EXISTS mt_without_pk;
CREATE TABLE mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGINE = MergeTree() ORDER BY tuple();
INSERT INTO mt_without_pk VALUES (1, 2);
CHECK TABLE mt_without_pk;
DROP TABLE IF EXISTS mt_without_pk;
DROP TABLE IF EXISTS replicated_mt_without_pk;
CREATE TABLE replicated_mt_without_pk (SomeField1 Int64, SomeField2 Double) ENGINE = ReplicatedMergeTree('/clickhouse/tables/replicated_mt_without_pk', '1') ORDER BY tuple();
INSERT INTO replicated_mt_without_pk VALUES (1, 2);
CHECK TABLE replicated_mt_without_pk;
DROP TABLE IF EXISTS replicated_mt_without_pk;