diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index 1c3e194a54c..7328f8331d4 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -26,6 +26,11 @@ if (ENABLE_REPLXX) ) endif () +if (USE_DEBUG_HELPERS) + set (INCLUDE_DEBUG_HELPERS "-include ${ClickHouse_SOURCE_DIR}/base/common/iostream_debug_helpers.h") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${INCLUDE_DEBUG_HELPERS}") +endif () + add_library (common ${SRCS}) target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index 6501a05d7bb..c57aa9c9212 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -4,7 +4,7 @@ endif() if (ENABLE_PARQUET) -if (NOT OS_FREEBSD AND NOT OS_DARWIN) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory +if (NOT OS_FREEBSD) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory option(USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ${NOT_UNBUNDLED}) endif() diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 0af6955eab0..d1a041bc293 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -36,6 +36,11 @@ if (NOT MSVC) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") endif () +if (USE_DEBUG_HELPERS) + set (INCLUDE_DEBUG_HELPERS "-I${ClickHouse_SOURCE_DIR}/base -include ${ClickHouse_SOURCE_DIR}/dbms/src/Core/iostream_debug_helpers.h") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${INCLUDE_DEBUG_HELPERS}") +endif () + # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. option (WEVERYTHING "Enables -Weverything option with some exceptions. This is intended for exploration of new compiler warnings that may be found to be useful. Only makes sense for clang." ON) diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h index c9d03257778..67ff90807d5 100644 --- a/dbms/src/Functions/FunctionBinaryArithmetic.h +++ b/dbms/src/Functions/FunctionBinaryArithmetic.h @@ -992,9 +992,9 @@ public: return true; } return false; -} + } -void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { /// Special case when multiply aggregate function state if (isAggregateMultiply(block.getByPosition(arguments[0]).type, block.getByPosition(arguments[1]).type)) diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 49c11bb5b68..7e6a4d58ace 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -24,7 +24,6 @@ #include -#include #include #include @@ -37,6 +36,15 @@ #include #include +#if USE_EMBEDDED_COMPILER +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop +#endif + namespace DB { @@ -957,26 +965,26 @@ private: const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count); - template - void executeTupleEqualityImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, const ColumnsWithTypeAndName & y, - size_t tuple_size, size_t input_rows_count) + void executeTupleEqualityImpl( + std::shared_ptr func_compare, + std::shared_ptr func_convolution, + Block & block, + size_t result, + const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, + size_t tuple_size, + size_t input_rows_count) { if (0 == tuple_size) throw Exception("Comparison of zero-sized tuples is not implemented.", ErrorCodes::NOT_IMPLEMENTED); - auto func_compare = ComparisonFunction::create(context); - auto func_convolution = ConvolutionFunction::create(context); - - auto func_compare_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_compare)); - auto func_convolution_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_convolution)); - Block tmp_block; for (size_t i = 0; i < tuple_size; ++i) { tmp_block.insert(x[i]); tmp_block.insert(y[i]); - auto impl = func_compare_adaptor.build({x[i], y[i]}); + auto impl = func_compare->build({x[i], y[i]}); /// Comparison of the elements. tmp_block.insert({ nullptr, std::make_shared(), "" }); @@ -998,34 +1006,30 @@ private: convolution_args[i] = i * 3 + 2; ColumnsWithTypeAndName convolution_types(convolution_args.size(), { nullptr, std::make_shared(), "" }); - auto impl = func_convolution_adaptor.build(convolution_types); + auto impl = func_convolution->build(convolution_types); impl->execute(tmp_block, convolution_args, tuple_size * 3, input_rows_count); block.getByPosition(result).column = tmp_block.getByPosition(tuple_size * 3).column; } - template - void executeTupleLessGreaterImpl(Block & block, size_t result, const ColumnsWithTypeAndName & x, - const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) + void executeTupleLessGreaterImpl( + std::shared_ptr func_compare_head, + std::shared_ptr func_compare_tail, + std::shared_ptr func_and, + std::shared_ptr func_or, + std::shared_ptr func_equals, + Block & block, + size_t result, + const ColumnsWithTypeAndName & x, + const ColumnsWithTypeAndName & y, + size_t tuple_size, + size_t input_rows_count) { - auto func_compare_head = HeadComparisonFunction::create(context); - auto func_compare_tail = TailComparisonFunction::create(context); - auto func_and = FunctionAnd::create(context); - auto func_or = FunctionOr::create(context); - auto func_equals = FunctionComparison::create(context); - - auto func_compare_head_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_compare_head)); - auto func_compare_tail_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_compare_tail)); - auto func_equals_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_equals)); - ColumnsWithTypeAndName bin_args = {{ nullptr, std::make_shared(), "" }, { nullptr, std::make_shared(), "" }}; - auto func_and_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_and)) - .build(bin_args); - - auto func_or_adaptor = FunctionOverloadResolverAdaptor(std::make_unique(func_or)) - .build(bin_args); + auto func_and_adaptor = func_and->build(bin_args); + auto func_or_adaptor = func_or->build(bin_args); Block tmp_block; @@ -1039,18 +1043,18 @@ private: if (i + 1 != tuple_size) { - auto impl_head = func_compare_head_adaptor.build({x[i], y[i]}); + auto impl_head = func_compare_head->build({x[i], y[i]}); impl_head->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); tmp_block.insert({ nullptr, std::make_shared(), "" }); - auto impl_equals = func_equals_adaptor.build({x[i], y[i]}); + auto impl_equals = func_equals->build({x[i], y[i]}); impl_equals->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3, input_rows_count); } else { - auto impl_tail = func_compare_tail_adaptor.build({x[i], y[i]}); + auto impl_tail = func_compare_tail->build({x[i], y[i]}); impl_tail->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count); } } @@ -1157,8 +1161,7 @@ public: if (left_tuple && right_tuple) { - auto adaptor = FunctionOverloadResolverAdaptor( - std::make_unique(FunctionComparison::create(context))); + auto adaptor = FunctionOverloadResolverAdaptor(std::make_unique(FunctionComparison::create(context))); size_t size = left_tuple->getElements().size(); for (size_t i = 0; i < size; ++i) diff --git a/dbms/src/Functions/equals.cpp b/dbms/src/Functions/equals.cpp index 9d706216fe5..69695f16218 100644 --- a/dbms/src/Functions/equals.cpp +++ b/dbms/src/Functions/equals.cpp @@ -18,7 +18,10 @@ void FunctionComparison::executeTupleImpl(Block & block, s const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleEqualityImpl(block, result, x, y, tuple_size, input_rows_count); + return executeTupleEqualityImpl( + FunctionFactory::instance().get("equals", context), + FunctionFactory::instance().get("and", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/greater.cpp b/dbms/src/Functions/greater.cpp index 3a22123f391..8c5ff2670bc 100644 --- a/dbms/src/Functions/greater.cpp +++ b/dbms/src/Functions/greater.cpp @@ -17,7 +17,15 @@ void FunctionComparison::executeTupleImpl(Block & block, const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleLessGreaterImpl(block, result, x, y, tuple_size, input_rows_count); + auto greater = FunctionFactory::instance().get("greater", context); + + return executeTupleLessGreaterImpl( + greater, + greater, + FunctionFactory::instance().get("and", context), + FunctionFactory::instance().get("or", context), + FunctionFactory::instance().get("equals", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/greaterOrEquals.cpp b/dbms/src/Functions/greaterOrEquals.cpp index 90ed2618f99..3b17cba0ee6 100644 --- a/dbms/src/Functions/greaterOrEquals.cpp +++ b/dbms/src/Functions/greaterOrEquals.cpp @@ -17,9 +17,13 @@ void FunctionComparison::executeTupleImp const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionGreaterOrEquals>(block, result, x, y, tuple_size, input_rows_count); + return executeTupleLessGreaterImpl( + FunctionFactory::instance().get("greater", context), + FunctionFactory::instance().get("greaterOrEquals", context), + FunctionFactory::instance().get("and", context), + FunctionFactory::instance().get("or", context), + FunctionFactory::instance().get("equals", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/less.cpp b/dbms/src/Functions/less.cpp index 8a9d777ee34..f916bfb4ccb 100644 --- a/dbms/src/Functions/less.cpp +++ b/dbms/src/Functions/less.cpp @@ -17,7 +17,15 @@ void FunctionComparison::executeTupleImpl(Block & block, size_ const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleLessGreaterImpl(block, result, x, y, tuple_size, input_rows_count); + auto less = FunctionFactory::instance().get("less", context); + + return executeTupleLessGreaterImpl( + less, + less, + FunctionFactory::instance().get("and", context), + FunctionFactory::instance().get("or", context), + FunctionFactory::instance().get("equals", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/lessOrEquals.cpp b/dbms/src/Functions/lessOrEquals.cpp index 09ef164d3ae..5e1bdbf9629 100644 --- a/dbms/src/Functions/lessOrEquals.cpp +++ b/dbms/src/Functions/lessOrEquals.cpp @@ -17,9 +17,13 @@ void FunctionComparison::executeTupleImpl(Bloc const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleLessGreaterImpl< - FunctionComparison, - FunctionLessOrEquals>(block, result, x, y, tuple_size, input_rows_count); + return executeTupleLessGreaterImpl( + FunctionFactory::instance().get("less", context), + FunctionFactory::instance().get("lessOrEquals", context), + FunctionFactory::instance().get("and", context), + FunctionFactory::instance().get("or", context), + FunctionFactory::instance().get("equals", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Functions/notEquals.cpp b/dbms/src/Functions/notEquals.cpp index 41bb2fb8c82..da29dfe9a69 100644 --- a/dbms/src/Functions/notEquals.cpp +++ b/dbms/src/Functions/notEquals.cpp @@ -17,7 +17,10 @@ void FunctionComparison::executeTupleImpl(Block & bl const ColumnsWithTypeAndName & y, size_t tuple_size, size_t input_rows_count) { - return executeTupleEqualityImpl(block, result, x, y, tuple_size, input_rows_count); + return executeTupleEqualityImpl( + FunctionFactory::instance().get("notEquals", context), + FunctionFactory::instance().get("or", context), + block, result, x, y, tuple_size, input_rows_count); } } diff --git a/dbms/src/Parsers/CMakeLists.txt b/dbms/src/Parsers/CMakeLists.txt index 176be236658..086384196aa 100644 --- a/dbms/src/Parsers/CMakeLists.txt +++ b/dbms/src/Parsers/CMakeLists.txt @@ -4,6 +4,11 @@ add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parser target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io) target_include_directories(clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR}) +if (USE_DEBUG_HELPERS) + set (INCLUDE_DEBUG_HELPERS "-I${ClickHouse_SOURCE_DIR}/base -include ${ClickHouse_SOURCE_DIR}/dbms/src/Parsers/iostream_debug_helpers.h") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${INCLUDE_DEBUG_HELPERS}") +endif () + if(ENABLE_TESTS) add_subdirectory(tests) endif() diff --git a/dbms/src/Processors/Executors/PipelineExecutor.cpp b/dbms/src/Processors/Executors/PipelineExecutor.cpp index f4be5a84518..5957c28061e 100644 --- a/dbms/src/Processors/Executors/PipelineExecutor.cpp +++ b/dbms/src/Processors/Executors/PipelineExecutor.cpp @@ -658,11 +658,12 @@ void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads total_time_ns = total_time_watch.elapsed(); wait_time_ns = total_time_ns - execution_time_ns - processing_time_ns; - LOG_TRACE(log, "Thread finished." - << " Total time: " << (total_time_ns / 1e9) << " sec." - << " Execution time: " << (execution_time_ns / 1e9) << " sec." - << " Processing time: " << (processing_time_ns / 1e9) << " sec." - << " Wait time: " << (wait_time_ns / 1e9) << "sec."); + LOG_TRACE(log, std::fixed << std::setprecision(3) + << "Thread finished." + << " Total time: " << (total_time_ns / 1e9) << " sec." + << " Execution time: " << (execution_time_ns / 1e9) << " sec." + << " Processing time: " << (processing_time_ns / 1e9) << " sec." + << " Wait time: " << (wait_time_ns / 1e9) << " sec."); #endif } @@ -690,14 +691,14 @@ void PipelineExecutor::executeImpl(size_t num_threads) bool finished_flag = false; SCOPE_EXIT( - if (!finished_flag) - { - finish(); + if (!finished_flag) + { + finish(); - for (auto & thread : threads) - if (thread.joinable()) - thread.join(); - } + for (auto & thread : threads) + if (thread.joinable()) + thread.join(); + } ); addChildlessProcessorsToStack(stack); diff --git a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 4e8772704e4..0615dd2a5d1 100644 --- a/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/dbms/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -260,13 +260,6 @@ namespace DB throw Exception{"Error while reading " + format_name + " data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; - if (0 == table->num_rows()) - throw Exception{"Empty table in input data", ErrorCodes::EMPTY_DATA_PASSED}; - - if (header.columns() > static_cast(table->num_columns())) - // TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable? - throw Exception{"Number of columns is less than the table has", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH}; - ++row_group_current; NameToColumnPtr name_to_column_ptr; diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 9e0d76acd1f..dbc19944a56 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -4,91 +4,171 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include "ArrowColumnToCHColumn.h" +#include + + +#include + namespace DB { - ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_) - : IInputFormat(std::move(header_), in_) +class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFile +{ +public: + RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer& in_, off_t file_size_) + : in(in_) + , file_size(file_size_) + , is_closed(false) { + } - Chunk ParquetBlockInputFormat::generate() + virtual arrow::Status GetSize(int64_t* size) override { - Chunk res; - auto &header = getPort().getHeader(); + *size = file_size; + return arrow::Status::OK(); + } - if (!in.eof()) + virtual arrow::Status Close() override + { + is_closed = true; + return arrow::Status::OK(); + } + + virtual arrow::Status Tell(int64_t* position) const override + { + *position = in.getPosition(); + return arrow::Status::OK(); + } + + virtual bool closed() const override { return is_closed; } + + virtual arrow::Status Read(int64_t nbytes, int64_t* bytes_read, void* out) override + { + *bytes_read = in.readBig(reinterpret_cast(out), nbytes); + return arrow::Status::OK(); + } + + virtual arrow::Status Read(int64_t nbytes, std::shared_ptr* out) override + { + std::shared_ptr buf; + ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(nbytes, &buf)); + size_t n = in.readBig(reinterpret_cast(buf->mutable_data()), nbytes); + *out = arrow::SliceBuffer(buf, 0, n); + return arrow::Status::OK(); + } + + virtual arrow::Status Seek(int64_t position) override + { + in.seek(position, SEEK_SET); + return arrow::Status::OK(); + } + +private: + SeekableReadBuffer& in; + off_t file_size; + bool is_closed; +}; + + +static std::shared_ptr as_arrow_file(ReadBuffer & in) +{ + if (auto fd_in = dynamic_cast(&in)) + { + struct stat stat; + auto res = ::fstat(fd_in->getFD(), &stat); + // if fd is a regular file i.e. not stdin + if (res == 0 && S_ISREG(stat.st_mode)) { - /* - First we load whole stream into string (its very bad and limiting .parquet file size to half? of RAM) - Then producing blocks for every row_group (dont load big .parquet files with one row_group - it can eat x10+ RAM from .parquet file size) - */ - - if (row_group_current < row_group_total) - throw Exception{"Got new data, but data from previous chunks was not read " + - std::to_string(row_group_current) + "/" + std::to_string(row_group_total), - ErrorCodes::CANNOT_READ_ALL_DATA}; - - file_data.clear(); - { - WriteBufferFromString file_buffer(file_data); - copyData(in, file_buffer); - } - - buffer = std::make_unique(file_data); - // TODO: maybe use parquet::RandomAccessSource? - auto status = parquet::arrow::FileReader::Make( - ::arrow::default_memory_pool(), - parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer)), - &file_reader); - - row_group_total = file_reader->num_row_groups(); - row_group_current = 0; + return std::make_shared(*fd_in, stat.st_size); } - //DUMP(row_group_current, row_group_total); - if (row_group_current >= row_group_total) - return res; + } - // TODO: also catch a ParquetException thrown by filereader? - //arrow::Status read_status = filereader.ReadTable(&table); - std::shared_ptr table; - arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, &table); + // fallback to loading the entire file in memory + std::string file_data; + { + WriteBufferFromString file_buffer(file_data); + copyData(in, file_buffer); + } + return std::make_shared(arrow::Buffer::FromString(std::move(file_data))); +} - ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, "Parquet"); +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +#define THROW_ARROW_NOT_OK(status) \ + do \ + { \ + if (::arrow::Status _s = (status); !_s.ok()) \ + throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + } while (false) + +ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_) + : IInputFormat(std::move(header_), in_) +{ + THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(as_arrow_file(in_), arrow::default_memory_pool(), &file_reader)); + row_group_total = file_reader->num_row_groups(); + + std::shared_ptr schema; + THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); + + for (int i = 0; i < schema->num_fields(); ++i) + { + if (getPort().getHeader().has(schema->field(i)->name())) + { + column_indices.push_back(i); + } + } +} + +Chunk ParquetBlockInputFormat::generate() +{ + Chunk res; + auto &header = getPort().getHeader(); + + if (row_group_current >= row_group_total) return res; - } - void ParquetBlockInputFormat::resetParser() - { - IInputFormat::resetParser(); + std::shared_ptr table; + arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); + ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, read_status, header, row_group_current, "Parquet"); + return res; +} - file_reader.reset(); - file_data.clear(); - buffer.reset(); - row_group_total = 0; - row_group_current = 0; - } +void ParquetBlockInputFormat::resetParser() +{ + IInputFormat::resetParser(); - void registerInputFormatProcessorParquet(FormatFactory &factory) - { - factory.registerInputFormatProcessor( - "Parquet", - [](ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams &, - const FormatSettings & /* settings */) - { - return std::make_shared(buf, sample); - }); - } + file_reader.reset(); + row_group_total = 0; + row_group_current = 0; +} + +void registerInputFormatProcessorParquet(FormatFactory &factory) +{ + factory.registerInputFormatProcessor( + "Parquet", + [](ReadBuffer &buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & /* settings */) + { + return std::make_shared(buf, sample); + }); +} } diff --git a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 665665557a5..1ed241a03cb 100644 --- a/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/dbms/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -27,13 +27,10 @@ protected: Chunk generate() override; private: - - // TODO: check that this class implements every part of its parent - std::unique_ptr file_reader; - std::string file_data; - std::unique_ptr buffer; int row_group_total = 0; + // indices of columns to read from Parquet file + std::vector column_indices; int row_group_current = 0; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 336e1930cf9..1c94db88c2a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index c5d785d263e..7bcb208980e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB { diff --git a/dbms/src/Storages/MergeTree/checkDataPart.cpp b/dbms/src/Storages/MergeTree/checkDataPart.cpp index 1039395a8ff..f88732768b9 100644 --- a/dbms/src/Storages/MergeTree/checkDataPart.cpp +++ b/dbms/src/Storages/MergeTree/checkDataPart.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/tests/performance/arithmetic.xml b/dbms/tests/performance/arithmetic.xml new file mode 100644 index 00000000000..066173d8635 --- /dev/null +++ b/dbms/tests/performance/arithmetic.xml @@ -0,0 +1,78 @@ + + loop + + + 10 + 100 + + + 100 + 1000 + + + + + + arg + + u8 + u16 + u32 + u64 + i8 + i16 + i32 + i64 + f32 + f64 + + + + op + + plus + minus + multiply + divide + intDivOrZero + + + + + +CREATE TABLE nums +( + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + f32 Float32, + f64 Float64 +) ENGINE = Memory; + + + +INSERT INTO nums +WITH cityHash64(number) AS x +SELECT + toUInt8(x), + toUInt16(x), + toUInt32(x), + toUInt64(x), + toInt8(x), + toInt16(x), + toInt32(x), + toInt64(x), + toFloat32(x), + toFloat64(x) +FROM numbers(100000000); + + + SELECT count() FROM nums WHERE NOT ignore({op}({arg}, {arg})) + + DROP TABLE nums + diff --git a/dbms/tests/queries/0_stateless/00900_parquet_load.reference b/dbms/tests/queries/0_stateless/00900_parquet_load.reference index 23627bf0f68..2930fcd3c14 100644 --- a/dbms/tests/queries/0_stateless/00900_parquet_load.reference +++ b/dbms/tests/queries/0_stateless/00900_parquet_load.reference @@ -174,16 +174,16 @@ Code: 8. DB::Ex---tion: Column "element" is not presented in input data Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: key_value: list not null> not null>> not null> not null === Try load data from nonnullable.impala.parquet -Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: map: list not null> not null +Code: 8. DB::Ex---tion: Column "element" is not presented in input data === Try load data from nullable.impala.parquet -Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: map: list not null> not null +Code: 8. DB::Ex---tion: Column "element" is not presented in input data === Try load data from nulls.snappy.parquet Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data === Try load data from repeated_no_annotation.parquet -Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: phone: list not null> not null +Code: 8. DB::Ex---tion: Column "number" is not presented in input data === Try load data from userdata1.parquet 1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02