Better caching of arrow build (#56657)

* Better caching of Arrow build
* Fix error
* Remove useless files
This commit is contained in:
Alexey Milovidov 2023-11-13 16:51:01 +01:00 committed by GitHub
parent 59fd09facc
commit 2ad98a58c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 103 additions and 47 deletions

View File

@ -109,7 +109,6 @@ set (ORC_CXX_HAS_CSTDINT 1)
set (ORC_CXX_HAS_THREAD_LOCAL 1)
include(orc_check.cmake)
configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh")
configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh")
@ -198,7 +197,9 @@ target_link_libraries(_orc PRIVATE
ch_contrib::snappy
ch_contrib::zlib
ch_contrib::zstd)
target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR})
target_include_directories(_orc SYSTEM BEFORE PUBLIC
${ORC_INCLUDE_DIR}
"${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src/orc/c++/include")
target_include_directories(_orc SYSTEM BEFORE PUBLIC ${ORC_BUILD_INCLUDE_DIR})
target_include_directories(_orc SYSTEM PRIVATE
${ORC_SOURCE_SRC_DIR}
@ -212,8 +213,6 @@ target_include_directories(_orc SYSTEM PRIVATE
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")
# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
set(ARROW_SRCS
"${LIBRARY_DIR}/array/array_base.cc"
@ -450,7 +449,7 @@ target_link_libraries(_arrow PUBLIC _orc)
add_dependencies(_arrow protoc)
target_include_directories(_arrow SYSTEM BEFORE PUBLIC ${ARROW_SRC_DIR})
target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/cpp/src")
target_include_directories(_arrow SYSTEM BEFORE PUBLIC "${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src")
target_include_directories(_arrow SYSTEM PRIVATE ${ARROW_SRC_DIR})
target_include_directories(_arrow SYSTEM PRIVATE ${HDFS_INCLUDE_DIR})

View File

@ -0,0 +1,61 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#define ARROW_VERSION_MAJOR 11
#define ARROW_VERSION_MINOR 0
#define ARROW_VERSION_PATCH 0
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
#define ARROW_VERSION_STRING "11.0.0"
#define ARROW_SO_VERSION "1100"
#define ARROW_FULL_SO_VERSION "1100.0.0"
#define ARROW_CXX_COMPILER_ID "Clang"
#define ARROW_CXX_COMPILER_VERSION "ClickHouse"
#define ARROW_CXX_COMPILER_FLAGS ""
#define ARROW_BUILD_TYPE ""
#define ARROW_GIT_ID ""
#define ARROW_GIT_DESCRIPTION ""
#define ARROW_PACKAGE_KIND ""
/* #undef ARROW_COMPUTE */
/* #undef ARROW_CSV */
/* #undef ARROW_CUDA */
/* #undef ARROW_DATASET */
/* #undef ARROW_FILESYSTEM */
/* #undef ARROW_FLIGHT */
/* #undef ARROW_FLIGHT_SQL */
/* #undef ARROW_IPC */
/* #undef ARROW_JEMALLOC */
/* #undef ARROW_JEMALLOC_VENDORED */
/* #undef ARROW_JSON */
/* #undef ARROW_ORC */
/* #undef ARROW_PARQUET */
/* #undef ARROW_SUBSTRAIT */
/* #undef ARROW_GCS */
/* #undef ARROW_S3 */
/* #undef ARROW_USE_NATIVE_INT128 */
/* #undef ARROW_WITH_MUSL */
/* #undef ARROW_WITH_OPENTELEMETRY */
/* #undef ARROW_WITH_UCX */
/* #undef GRPCPP_PP_INCLUDE */

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ORC_CONFIG_HH
#define ORC_CONFIG_HH
#define ORC_VERSION ""
#define ORC_CXX_HAS_CSTDINT
#ifdef ORC_CXX_HAS_CSTDINT
#include <cstdint>
#else
#include <stdint.h>
#endif
// Following MACROS should be keeped for backward compatibility.
#define ORC_NOEXCEPT noexcept
#define ORC_NULLPTR nullptr
#define ORC_OVERRIDE override
#define ORC_UNIQUE_PTR std::unique_ptr
#endif

View File

@ -2,9 +2,3 @@ if (TARGET ch_contrib::hivemetastore)
clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp)
target_link_libraries (comma_separated_streams PRIVATE dbms)
endif()
if (USE_ORC)
clickhouse_add_executable (native_orc native_orc.cpp)
target_link_libraries (native_orc PRIVATE dbms)
target_include_directories (native_orc PRIVATE ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include)
endif ()

View File

@ -1,36 +0,0 @@
#include <string>
#include <IO/ReadBufferFromFile.h>
#include <Processors/Formats/Impl/NativeORCBlockInputFormat.h>
#include <IO/copyData.h>
using namespace DB;
int main()
{
/// Read schema from orc file
String path = "/path/to/orc/file";
// String path = "/data1/clickhouse_official/data/user_files/bigolive_audience_stats_orc.orc";
{
ReadBufferFromFile in(path);
NativeORCSchemaReader schema_reader(in, {});
auto schema = schema_reader.readSchema();
std::cout << "schema:" << schema.toString() << std::endl;
}
/// Read schema from string with orc data
{
ReadBufferFromFile in(path);
String content;
WriteBufferFromString out(content);
copyData(in, out);
content.resize(out.count());
ReadBufferFromString in2(content);
NativeORCSchemaReader schema_reader(in2, {});
auto schema = schema_reader.readSchema();
std::cout << "schema:" << schema.toString() << std::endl;
}
return 0;
}