add new table function: hive()

This commit is contained in:
lgbo-ustc 2022-02-28 20:51:33 +08:00 committed by liangjiabiao
parent db69ab9d17
commit 99cd25d70e
6 changed files with 34 additions and 3 deletions

View File

@ -8,8 +8,10 @@
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/status.h>
#include <arrow/filesystem/filesystem.h>
#include <orc/OrcFile.hh>
#include <orc/Reader.hh>
#include <orc/Statistics.hh>
#include <parquet/arrow/reader.h>
#include <parquet/file_reader.h>
#include <parquet/statistics.h>

View File

@ -7,8 +7,6 @@
#include <memory>
#include <boost/algorithm/string/join.hpp>
#include <orc/Statistics.hh>
#include <arrow/filesystem/filesystem.h>
#include <Core/Field.h>
#include <Core/Block.h>
@ -18,6 +16,8 @@
namespace orc
{
class Reader;
class Statistics;
class ColumnStatistics;
}
namespace parquet
@ -36,6 +36,11 @@ namespace io
class RandomAccessFile;
}
namespace fs
{
class FileSystem;
}
class Buffer;
}

View File

@ -1,9 +1,11 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_table_functions .)
add_headers_and_sources(clickhouse_table_functions Hive)
list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp)
list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h)
add_library(clickhouse_table_functions ${clickhouse_table_functions_sources})
target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms)
target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms ch_contrib::hivemetastore ch_contrib::hdfs)

View File

@ -31,6 +31,10 @@ void registerTableFunctions()
registerTableFunctionHDFSCluster(factory);
#endif
#if USE_HIVE
registerTableFunctionHive(factory);
#endif
registerTableFunctionODBC(factory);
registerTableFunctionJDBC(factory);

View File

@ -29,6 +29,10 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory);
void registerTableFunctionHDFSCluster(TableFunctionFactory & factory);
#endif
#if USE_HIVE
void registerTableFunctionHive(TableFunctionFactory & factory);
#endif
void registerTableFunctionODBC(TableFunctionFactory & factory);
void registerTableFunctionJDBC(TableFunctionFactory & factory);

View File

@ -107,6 +107,20 @@ def test_parquet_groupby_with_cache(started_cluster):
2021-11-16 2
"""
assert result == expected_result
def test_parquet_groupby_by_hive_function(started_cluster):
logging.info('Start testing groupby ...')
node = started_cluster.instances['h0_0_0']
result = node.query("""
SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day
""")
expected_result = """2021-11-01 1
2021-11-05 2
2021-11-11 1
2021-11-16 2
"""
assert result == expected_result
def test_cache_read_bytes(started_cluster):
node = started_cluster.instances['h0_0_0']
node.query("set input_format_parquet_allow_missing_columns = true")