From 99cd25d70e066a6a612799c027955b9cee89ed86 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 20:51:33 +0800 Subject: [PATCH] add new table function: hive() --- src/Storages/Hive/HiveFile.cpp | 2 ++ src/Storages/Hive/HiveFile.h | 9 +++++++-- src/TableFunctions/CMakeLists.txt | 4 +++- src/TableFunctions/registerTableFunctions.cpp | 4 ++++ src/TableFunctions/registerTableFunctions.h | 4 ++++ tests/integration/test_hive_query/test.py | 14 ++++++++++++++ 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index b0cfa9809e1..dffcca61a9c 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #include #include #include diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 63cca2562eb..6d2ba29ba0f 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -7,8 +7,6 @@ #include #include -#include -#include #include #include @@ -18,6 +16,8 @@ namespace orc { class Reader; +class Statistics; +class ColumnStatistics; } namespace parquet @@ -36,6 +36,11 @@ namespace io class RandomAccessFile; } +namespace fs +{ + class FileSystem; +} + class Buffer; } diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index 576d1ea23ff..10f4ba8c4c8 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -1,9 +1,11 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_table_functions .) +add_headers_and_sources(clickhouse_table_functions Hive) list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp) list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) -target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) +target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms ch_contrib::hivemetastore ch_contrib::hdfs) + diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ed08972e74d..9e09fac665a 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -31,6 +31,10 @@ void registerTableFunctions() registerTableFunctionHDFSCluster(factory); #endif +#if USE_HIVE + registerTableFunctionHive(factory); +#endif + registerTableFunctionODBC(factory); registerTableFunctionJDBC(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 72ca185f656..e39d21cb580 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -29,6 +29,10 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory); void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif +#if USE_HIVE +void registerTableFunctionHive(TableFunctionFactory & factory); +#endif + void registerTableFunctionODBC(TableFunctionFactory & factory); void registerTableFunctionJDBC(TableFunctionFactory & factory); diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index a68ae0b066d..093dd74ca64 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -107,6 +107,20 @@ def test_parquet_groupby_with_cache(started_cluster): 2021-11-16 2 """ assert result == expected_result + +def test_parquet_groupby_by_hive_function(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day + """) + expected_result = """2021-11-01 1 +2021-11-05 2 +2021-11-11 1 +2021-11-16 2 +""" + assert result == expected_result + def test_cache_read_bytes(started_cluster): node = started_cluster.instances['h0_0_0'] node.query("set input_format_parquet_allow_missing_columns = true")