mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 10:31:57 +00:00
polymorphic parts (development)
This commit is contained in:
parent
55deeea608
commit
7dbdbff748
@ -265,6 +265,8 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreams(
|
||||
/// Adjust value size hint. Divide it to the average array size.
|
||||
settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;
|
||||
|
||||
std::cerr << "nested_limit: " << nested_limit << "\n";
|
||||
|
||||
nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state);
|
||||
settings.path.pop_back();
|
||||
|
||||
|
@ -221,6 +221,8 @@ void IMergeTreeDataPartWriter::calculateAndSerializePrimaryIndex(const Block & p
|
||||
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
|
||||
|
||||
/// Write index. The index contains Primary Key value for each `index_granularity` row.
|
||||
|
||||
std::cerr << "writing index...\n";
|
||||
for (size_t i = index_offset; i < rows;)
|
||||
{
|
||||
if (storage.hasPrimaryKey())
|
||||
@ -233,10 +235,12 @@ void IMergeTreeDataPartWriter::calculateAndSerializePrimaryIndex(const Block & p
|
||||
}
|
||||
}
|
||||
|
||||
++current_mark;
|
||||
if (current_mark < index_granularity.getMarksCount())
|
||||
i += index_granularity.getMarkRows(current_mark);
|
||||
else
|
||||
std::cerr << "(index) i: " << i << "\n";
|
||||
std::cerr << "(index) current_mark: " << current_mark << "\n";
|
||||
std::cerr << "(index) rows in mark: " << index_granularity.getMarkRows(current_mark) << "\n";
|
||||
|
||||
i += index_granularity.getMarkRows(current_mark++);
|
||||
if (current_mark >= index_granularity.getMarksCount())
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -39,9 +39,6 @@ void MergeTreeDataPartWriterCompact::write(
|
||||
const Block & block, const IColumn::Permutation * permutation,
|
||||
const Block & primary_key_block, const Block & skip_indexes_block)
|
||||
{
|
||||
if (!header)
|
||||
header = block.cloneEmpty();
|
||||
|
||||
/// Fill index granularity for this block
|
||||
/// if it's unknown (in case of insert data or horizontal merge,
|
||||
/// but not in case of vertical merge)
|
||||
@ -72,6 +69,9 @@ void MergeTreeDataPartWriterCompact::write(
|
||||
result_block = block;
|
||||
}
|
||||
|
||||
if (!header)
|
||||
header = result_block.cloneEmpty();
|
||||
|
||||
auto result = squashing.add(result_block.mutateColumns());
|
||||
if (!result.ready)
|
||||
return;
|
||||
|
@ -106,6 +106,8 @@ void MergeTreeDataPartWriterWide::write(const Block & block,
|
||||
fillIndexGranularity(block);
|
||||
|
||||
std::cerr << "(MergeTreeDataPartWriterWide::write) marks_count: " << index_granularity.getMarksCount() << "\n";
|
||||
std::cerr << "(MergeTreeDataPartWriterWide::write) current_mark: " << current_mark << "\n";
|
||||
|
||||
|
||||
WrittenOffsetColumns offset_columns;
|
||||
MarkWithOffset result;
|
||||
|
@ -679,6 +679,12 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
|
||||
const Settings & settings,
|
||||
const ReaderSettings & reader_settings) const
|
||||
{
|
||||
std::cerr << "marks to read: ";
|
||||
for (const auto & part : parts)
|
||||
for (auto range : part.ranges)
|
||||
std::cerr << "(" << range.begin << ", " << range.end << ") ";
|
||||
|
||||
|
||||
/// Count marks for each part.
|
||||
std::vector<size_t> sum_marks_in_parts(parts.size());
|
||||
size_t sum_marks = 0;
|
||||
|
@ -62,6 +62,7 @@ void MergeTreeIndexGranularityInfo::setAdaptive(size_t index_granularity_bytes_,
|
||||
{
|
||||
is_adaptive = true;
|
||||
mark_size_in_bytes = getAdaptiveMrkSize(part_type, columns_num);
|
||||
skip_index_mark_size_in_bytes = sizeof(MarkInCompressedFile) + sizeof(UInt64);
|
||||
marks_file_extension = getAdaptiveMrkExtension(part_type);
|
||||
index_granularity_bytes = index_granularity_bytes_;
|
||||
}
|
||||
@ -69,7 +70,7 @@ void MergeTreeIndexGranularityInfo::setAdaptive(size_t index_granularity_bytes_,
|
||||
void MergeTreeIndexGranularityInfo::setNonAdaptive()
|
||||
{
|
||||
is_adaptive = false;
|
||||
mark_size_in_bytes = getNonAdaptiveMrkSize();
|
||||
mark_size_in_bytes = skip_index_mark_size_in_bytes = getNonAdaptiveMrkSize();
|
||||
marks_file_extension = getNonAdaptiveMrkExtension();
|
||||
index_granularity_bytes = 0;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <optional>
|
||||
#include <Core/Types.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart_fwd.h>
|
||||
#include <DataStreams/MarkInCompressedFile.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -17,7 +18,9 @@ public:
|
||||
String marks_file_extension;
|
||||
|
||||
/// Size of one mark in file two or three size_t numbers
|
||||
UInt16 mark_size_in_bytes = 0;
|
||||
UInt32 mark_size_in_bytes = 0;
|
||||
|
||||
UInt8 skip_index_mark_size_in_bytes = 0;
|
||||
|
||||
/// Is stride in rows between marks non fixed?
|
||||
bool is_adaptive = false;
|
||||
@ -53,7 +56,7 @@ private:
|
||||
};
|
||||
|
||||
constexpr inline auto getNonAdaptiveMrkExtension() { return ".mrk"; }
|
||||
constexpr inline auto getNonAdaptiveMrkSize() { return sizeof(UInt64) * 2; }
|
||||
constexpr inline auto getNonAdaptiveMrkSize() { return sizeof(MarkInCompressedFile) * 2; }
|
||||
|
||||
inline std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type)
|
||||
{
|
||||
|
@ -12,6 +12,7 @@ MergeTreeIndexReader::MergeTreeIndexReader(
|
||||
{ 0, DBMS_DEFAULT_BUFFER_SIZE, false}, nullptr, nullptr,
|
||||
part_->getFileSizeOrZero(index->getFileName() + ".idx"),
|
||||
&part_->index_granularity_info,
|
||||
MergeTreeReaderStream::ReadingMode::INDEX,
|
||||
ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE)
|
||||
{
|
||||
stream.seekToStart();
|
||||
|
@ -56,14 +56,17 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
/// FIXME compute correct granularity
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) max_rows_to_read: " << max_rows_to_read << "\n";
|
||||
|
||||
size_t read_rows = 0;
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) from_mark: " << from_mark << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) continue_reading: " << continue_reading << "\n";
|
||||
|
||||
if (continue_reading)
|
||||
from_mark = next_mark;
|
||||
|
||||
size_t read_rows = 0;
|
||||
while (read_rows < max_rows_to_read)
|
||||
{
|
||||
size_t rows_to_read = data_part->index_granularity.getMarkRows(from_mark);
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) rows_to_read: " << rows_to_read << "\n";
|
||||
|
||||
for (const auto & it : columns)
|
||||
{
|
||||
bool append = res.has(it.name);
|
||||
@ -75,11 +78,16 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
|
||||
try
|
||||
{
|
||||
// size_t column_size_before_reading = column->size();
|
||||
size_t column_size_before_reading = column->size();
|
||||
size_t column_position = data_part->getColumnPosition(it.name);
|
||||
|
||||
readData(it.name, *it.type, *column, from_mark, column_position, rows_to_read);
|
||||
|
||||
size_t read_rows_in_column = column->size() - column_size_before_reading;
|
||||
if (read_rows_in_column < rows_to_read)
|
||||
throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +
|
||||
". Rows expected: "+ toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
|
||||
/// For elements of Nested, column_size_before_reading may be greater than column size
|
||||
/// if offsets are not empty and were already read, but elements are empty.
|
||||
/// FIXME
|
||||
@ -101,9 +109,13 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
|
||||
++from_mark;
|
||||
read_rows += rows_to_read;
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) cur mark: " << from_mark << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) read_rows: " << read_rows << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) rows_to_read: " << rows_to_read << "\n";
|
||||
}
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::readRows) read_rows: " << read_rows << "\n";
|
||||
next_mark = from_mark;
|
||||
|
||||
return read_rows;
|
||||
}
|
||||
@ -118,23 +130,20 @@ void MergeTreeReaderCompact::readData(
|
||||
std::cerr << "(MergeTreeReaderCompact::readData) rows_to_read: " << rows_to_read << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::readData) start reading column: " << name << "\n";
|
||||
|
||||
/// FIXME seek only if needed
|
||||
seekToMark(from_mark, column_position);
|
||||
|
||||
IDataType::DeserializeBinaryBulkSettings deserialize_settings;
|
||||
deserialize_settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return data_buffer; };
|
||||
deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
|
||||
// deserialize_settings.avg_value_size_hint = avg_value_size_hints[name];
|
||||
deserialize_settings.position_independent_encoding = false;
|
||||
|
||||
IDataType::DeserializeBinaryBulkStatePtr state;
|
||||
type.deserializeBinaryBulkStatePrefix(deserialize_settings, state);
|
||||
type.deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state);
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::readData) end reading column rows: " << column.size() << "\n";
|
||||
std::cerr << "(MergeTreeReaderCompact::readData) end reading column: " << name << "\n";
|
||||
|
||||
// if (column.size() != rows_to_read)
|
||||
// throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column.size()) + ". Rows expected: "+ toString(rows_to_read) + ".",
|
||||
// ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
// std::cerr << "(MergeTreeReaderCompact::readData) end reading column rows: " << column.size() << "\n";
|
||||
// std::cerr << "(MergeTreeReaderCompact::readData) end reading column: " << name << "\n";
|
||||
}
|
||||
|
||||
|
||||
@ -198,7 +207,7 @@ void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index)
|
||||
{
|
||||
MarkInCompressedFile mark = marks_loader.getMark(row_index, column_index);
|
||||
|
||||
std::cerr << "(MergeTreeReaderCompact::seekToMark) mark: (" << mark.offset_in_compressed_file << ", " << mark.offset_in_decompressed_block << "\n";
|
||||
// std::cerr << "(MergeTreeReaderCompact::seekToMark) mark: (" << mark.offset_in_compressed_file << ", " << mark.offset_in_decompressed_block << "\n";
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -33,6 +33,8 @@ private:
|
||||
|
||||
MergeTreeMarksLoader marks_loader;
|
||||
|
||||
size_t next_mark = 0;
|
||||
|
||||
void initMarksLoader();
|
||||
void seekToStart();
|
||||
void seekToMark(size_t row, size_t col);
|
||||
|
@ -21,10 +21,12 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
MarkCache * mark_cache_,
|
||||
UncompressedCache * uncompressed_cache, size_t file_size,
|
||||
const MergeTreeIndexGranularityInfo * index_granularity_info_,
|
||||
ReadingMode mode_,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
|
||||
: path_prefix(path_prefix_), data_file_extension(data_file_extension_), marks_count(marks_count_)
|
||||
, mark_cache(mark_cache_), save_marks_in_cache(settings.save_marks_in_cache)
|
||||
, index_granularity_info(index_granularity_info_)
|
||||
, mode(mode_)
|
||||
{
|
||||
/// Compute the size of the buffer.
|
||||
size_t max_mark_range_bytes = 0;
|
||||
@ -115,8 +117,14 @@ void MergeTreeReaderStream::initMarksLoader()
|
||||
/// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
|
||||
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
|
||||
|
||||
std::cerr << "data_file_extension: " << data_file_extension << '\n';
|
||||
|
||||
size_t file_size = Poco::File(mrk_path).getSize();
|
||||
size_t expected_file_size = index_granularity_info->mark_size_in_bytes * marks_count;
|
||||
size_t mark_size = mode == ReadingMode::INDEX
|
||||
? index_granularity_info->skip_index_mark_size_in_bytes
|
||||
: index_granularity_info->mark_size_in_bytes;
|
||||
|
||||
size_t expected_file_size = mark_size * marks_count;
|
||||
if (expected_file_size != file_size)
|
||||
throw Exception(
|
||||
"Bad size of marks file '" + mrk_path + "': " + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),
|
||||
@ -143,7 +151,7 @@ void MergeTreeReaderStream::initMarksLoader()
|
||||
buffer.seek(sizeof(size_t), SEEK_CUR);
|
||||
++i;
|
||||
}
|
||||
if (i * index_granularity_info->mark_size_in_bytes != file_size)
|
||||
if (i * mark_size != file_size)
|
||||
throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
}
|
||||
res->protect();
|
||||
|
@ -16,12 +16,19 @@ namespace DB
|
||||
class MergeTreeReaderStream
|
||||
{
|
||||
public:
|
||||
enum class ReadingMode
|
||||
{
|
||||
COLUMN,
|
||||
INDEX,
|
||||
};
|
||||
|
||||
MergeTreeReaderStream(
|
||||
const String & path_prefix_, const String & data_file_extension_, size_t marks_count_,
|
||||
const MarkRanges & all_mark_ranges,
|
||||
const ReaderSettings & settings_,
|
||||
MarkCache * mark_cache, UncompressedCache * uncompressed_cache,
|
||||
size_t file_size, const MergeTreeIndexGranularityInfo * index_granularity_info_,
|
||||
ReadingMode mode_,
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
|
||||
|
||||
void seekToMark(size_t index);
|
||||
@ -42,6 +49,7 @@ private:
|
||||
bool save_marks_in_cache;
|
||||
|
||||
const MergeTreeIndexGranularityInfo * index_granularity_info;
|
||||
ReadingMode mode;
|
||||
|
||||
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
|
||||
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
|
||||
|
@ -50,9 +50,8 @@ MergeTreeReaderWide::MergeTreeReaderWide(const MergeTreeData::DataPartPtr & data
|
||||
|
||||
size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res)
|
||||
{
|
||||
std::cerr << "(MergeTreeReaderWide::readRows) columns: " << columns.toString() << "\n";
|
||||
std::cerr << "(MergeTreeReaderWide::readRows) from_rows: " << from_mark << "\n";
|
||||
std::cerr << "(MergeTreeReaderWide::readRows) block: " << res.dumpStructure() << "\n";
|
||||
std::cerr << "(MergeTreeReaderWide::readRows) from_mark: " << from_mark << "\n";
|
||||
std::cerr << "(MergeTreeReaderWide::readRows) continue_reading: " << continue_reading << "\n";
|
||||
|
||||
size_t read_rows = 0;
|
||||
try
|
||||
@ -169,6 +168,7 @@ void MergeTreeReaderWide::addStreams(const String & name, const IDataType & type
|
||||
all_mark_ranges, settings, mark_cache,
|
||||
uncompressed_cache, data_part->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION),
|
||||
&data_part->index_granularity_info,
|
||||
MergeTreeReaderStream::ReadingMode::COLUMN,
|
||||
profile_callback, clock_type));
|
||||
};
|
||||
|
||||
|
@ -101,6 +101,11 @@ try
|
||||
bool continue_reading = (current_mark != 0);
|
||||
size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, res);
|
||||
|
||||
std::cerr << "(MergeTreeSequentialBlockInputStream) rows_to_read: " << rows_to_read << '\n';
|
||||
std::cerr << "(MergeTreeSequentialBlockInputStream) current_mark: " << current_mark << '\n';
|
||||
std::cerr << "(MergeTreeSequentialBlockInputStream) rows_readed: " << rows_readed << '\n';
|
||||
|
||||
|
||||
if (res)
|
||||
{
|
||||
res.checkNumberOfRows();
|
||||
|
Loading…
Reference in New Issue
Block a user