diff --git a/cmake/Modules/FindArrow.cmake b/cmake/Modules/FindArrow.cmake index 4043a474988..5bd111de1e3 100644 --- a/cmake/Modules/FindArrow.cmake +++ b/cmake/Modules/FindArrow.cmake @@ -17,143 +17,417 @@ # specific language governing permissions and limitations # under the License. -# - Find ARROW (arrow/api.h, libarrow.a, libarrow.so) +# - Find Arrow (arrow/api.h, libarrow.a, libarrow.so) # This module defines +# ARROW_FOUND, whether Arrow has been found +# ARROW_FULL_SO_VERSION, full shared object version of found Arrow "100.0.0" +# ARROW_IMPORT_LIB, path to libarrow's import library (Windows only) # ARROW_INCLUDE_DIR, directory containing headers -# ARROW_LIBS, directory containing arrow libraries -# ARROW_STATIC_LIB, path to libarrow.a +# ARROW_LIBS, deprecated. Use ARROW_LIB_DIR instead +# ARROW_LIB_DIR, directory containing Arrow libraries +# ARROW_SHARED_IMP_LIB, deprecated. Use ARROW_IMPORT_LIB instead # ARROW_SHARED_LIB, path to libarrow's shared library -# ARROW_SHARED_IMP_LIB, path to libarrow's import library (MSVC only) -# ARROW_FOUND, whether arrow has been found +# ARROW_SO_VERSION, shared object version of found Arrow such as "100" +# ARROW_STATIC_LIB, path to libarrow.a +# ARROW_VERSION, version of found Arrow +# ARROW_VERSION_MAJOR, major version of found Arrow +# ARROW_VERSION_MINOR, minor version of found Arrow +# ARROW_VERSION_PATCH, patch version of found Arrow + +if(DEFINED ARROW_FOUND) + return() +endif() include(FindPkgConfig) -include(GNUInstallDirs) +include(FindPackageHandleStandardArgs) -if ("$ENV{ARROW_HOME}" STREQUAL "") - pkg_check_modules(ARROW arrow) - if (ARROW_FOUND) - pkg_get_variable(ARROW_SO_VERSION arrow so_version) - set(ARROW_ABI_VERSION ${ARROW_SO_VERSION}) - message(STATUS "Arrow SO and ABI version: ${ARROW_SO_VERSION}") - pkg_get_variable(ARROW_FULL_SO_VERSION arrow full_so_version) - message(STATUS "Arrow full SO version: ${ARROW_FULL_SO_VERSION}") - if ("${ARROW_INCLUDE_DIRS}" STREQUAL "") - set(ARROW_INCLUDE_DIRS "/usr/${CMAKE_INSTALL_INCLUDEDIR}") - endif() - if ("${ARROW_LIBRARY_DIRS}" STREQUAL "") - set(ARROW_LIBRARY_DIRS "/usr/${CMAKE_INSTALL_LIBDIR}") - if (EXISTS "/etc/debian_version" AND CMAKE_LIBRARY_ARCHITECTURE) - set(ARROW_LIBRARY_DIRS - "${ARROW_LIBRARY_DIRS}/${CMAKE_LIBRARY_ARCHITECTURE}") - endif() - endif() - set(ARROW_INCLUDE_DIR ${ARROW_INCLUDE_DIRS}) - set(ARROW_LIBS ${ARROW_LIBRARY_DIRS}) - set(ARROW_SEARCH_LIB_PATH ${ARROW_LIBRARY_DIRS}) - endif() -else() - set(ARROW_HOME "$ENV{ARROW_HOME}") - - set(ARROW_SEARCH_HEADER_PATHS - ${ARROW_HOME}/include - ) - - set(ARROW_SEARCH_LIB_PATH - ${ARROW_HOME}/lib - ) - - find_path(ARROW_INCLUDE_DIR arrow/array.h PATHS - ${ARROW_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH - ) +set(ARROW_SEARCH_LIB_PATH_SUFFIXES) +if(CMAKE_LIBRARY_ARCHITECTURE) + list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}") +endif() +list(APPEND ARROW_SEARCH_LIB_PATH_SUFFIXES + "lib64" + "lib32" + "lib" + "bin") +set(ARROW_CONFIG_SUFFIXES + "_RELEASE" + "_RELWITHDEBINFO" + "_MINSIZEREL" + "_DEBUG" + "") +if(CMAKE_BUILD_TYPE) + string(TOUPPER ${CMAKE_BUILD_TYPE} ARROW_CONFIG_SUFFIX_PREFERRED) + set(ARROW_CONFIG_SUFFIX_PREFERRED "_${ARROW_CONFIG_SUFFIX_PREFERRED}") + list(INSERT ARROW_CONFIG_SUFFIXES 0 "${ARROW_CONFIG_SUFFIX_PREFERRED}") endif() -find_library(ARROW_LIB_PATH NAMES arrow - PATHS - ${ARROW_SEARCH_LIB_PATH} - NO_DEFAULT_PATH) -get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY) - -find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python - PATHS - ${ARROW_SEARCH_LIB_PATH} - NO_DEFAULT_PATH) -get_filename_component(ARROW_PYTHON_LIBS ${ARROW_PYTHON_LIB_PATH} DIRECTORY) - -if (MSVC) - SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll") - - if (MSVC AND NOT DEFINED ARROW_MSVC_STATIC_LIB_SUFFIX) +if(NOT DEFINED ARROW_MSVC_STATIC_LIB_SUFFIX) + if(MSVC) set(ARROW_MSVC_STATIC_LIB_SUFFIX "_static") - endif() - - find_library(ARROW_SHARED_LIBRARIES NAMES arrow - PATHS ${ARROW_HOME} NO_DEFAULT_PATH - PATH_SUFFIXES "bin" ) - - find_library(ARROW_PYTHON_SHARED_LIBRARIES NAMES arrow_python - PATHS ${ARROW_HOME} NO_DEFAULT_PATH - PATH_SUFFIXES "bin" ) - get_filename_component(ARROW_SHARED_LIBS ${ARROW_SHARED_LIBRARIES} PATH ) - get_filename_component(ARROW_PYTHON_SHARED_LIBS ${ARROW_PYTHON_SHARED_LIBRARIES} PATH ) -endif () - -if (ARROW_INCLUDE_DIR AND ARROW_LIBS) - set(ARROW_FOUND TRUE) - set(ARROW_LIB_NAME arrow) - set(ARROW_PYTHON_LIB_NAME arrow_python) - if (MSVC) - set(ARROW_STATIC_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIBS}/${ARROW_PYTHON_LIB_NAME}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(ARROW_SHARED_LIB ${ARROW_SHARED_LIBS}/${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_PYTHON_SHARED_LIB ${ARROW_PYTHON_SHARED_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_SHARED_IMP_LIB ${ARROW_LIBS}/${ARROW_LIB_NAME}.lib) - set(ARROW_PYTHON_SHARED_IMP_LIB ${ARROW_PYTHON_LIBS}/${ARROW_PYTHON_LIB_NAME}.lib) else() - set(ARROW_STATIC_LIB ${ARROW_LIBS}/lib${ARROW_LIB_NAME}.a) - set(ARROW_PYTHON_STATIC_LIB ${ARROW_LIBS}/lib${ARROW_PYTHON_LIB_NAME}.a) - - set(ARROW_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_MSVC_STATIC_LIB_SUFFIX "") endif() endif() -if (ARROW_FOUND) - if (NOT Arrow_FIND_QUIETLY) - message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}") - message(STATUS "Found the Arrow Python library: ${ARROW_PYTHON_LIB_PATH}") - endif () -else () - if (NOT Arrow_FIND_QUIETLY) - set(ARROW_ERR_MSG "Could not find the Arrow library. Looked for headers") - set(ARROW_ERR_MSG "${ARROW_ERR_MSG} in ${ARROW_SEARCH_HEADER_PATHS}, and for libs") - set(ARROW_ERR_MSG "${ARROW_ERR_MSG} in ${ARROW_SEARCH_LIB_PATH}") - if (Arrow_FIND_REQUIRED) - message(FATAL_ERROR "${ARROW_ERR_MSG}") - else (Arrow_FIND_REQUIRED) - message(STATUS "${ARROW_ERR_MSG}") - endif (Arrow_FIND_REQUIRED) - endif () - set(ARROW_FOUND FALSE) -endif () +# Internal function. +# +# Set shared library name for ${base_name} to ${output_variable}. +# +# Example: +# arrow_build_shared_library_name(ARROW_SHARED_LIBRARY_NAME arrow) +# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.so on Linux +# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.dylib on macOS +# # -> ARROW_SHARED_LIBRARY_NAME=arrow.dll with MSVC on Windows +# # -> ARROW_SHARED_LIBRARY_NAME=libarrow.dll with MinGW on Windows +function(arrow_build_shared_library_name output_variable base_name) + set(${output_variable} + "${CMAKE_SHARED_LIBRARY_PREFIX}${base_name}${CMAKE_SHARED_LIBRARY_SUFFIX}" + PARENT_SCOPE) +endfunction() -if (MSVC) - mark_as_advanced( - ARROW_INCLUDE_DIR - ARROW_STATIC_LIB - ARROW_SHARED_LIB - ARROW_SHARED_IMP_LIB - ARROW_PYTHON_STATIC_LIB - ARROW_PYTHON_SHARED_LIB - ARROW_PYTHON_SHARED_IMP_LIB - ) -else() - mark_as_advanced( - ARROW_INCLUDE_DIR - ARROW_STATIC_LIB - ARROW_SHARED_LIB - ARROW_PYTHON_STATIC_LIB - ARROW_PYTHON_SHARED_LIB - ) +# Internal function. +# +# Set import library name for ${base_name} to ${output_variable}. +# This is useful only for MSVC build. Import library is used only +# with MSVC build. +# +# Example: +# arrow_build_import_library_name(ARROW_IMPORT_LIBRARY_NAME arrow) +# # -> ARROW_IMPORT_LIBRARY_NAME=arrow on Linux (meaningless) +# # -> ARROW_IMPORT_LIBRARY_NAME=arrow on macOS (meaningless) +# # -> ARROW_IMPORT_LIBRARY_NAME=arrow.lib with MSVC on Windows +# # -> ARROW_IMPORT_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows +function(arrow_build_import_library_name output_variable base_name) + set(${output_variable} + "${CMAKE_IMPORT_LIBRARY_PREFIX}${base_name}${CMAKE_IMPORT_LIBRARY_SUFFIX}" + PARENT_SCOPE) +endfunction() + +# Internal function. +# +# Set static library name for ${base_name} to ${output_variable}. +# +# Example: +# arrow_build_static_library_name(ARROW_STATIC_LIBRARY_NAME arrow) +# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.a on Linux +# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.a on macOS +# # -> ARROW_STATIC_LIBRARY_NAME=arrow.lib with MSVC on Windows +# # -> ARROW_STATIC_LIBRARY_NAME=libarrow.dll.a with MinGW on Windows +function(arrow_build_static_library_name output_variable base_name) + set( + ${output_variable} + "${CMAKE_STATIC_LIBRARY_PREFIX}${base_name}${ARROW_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) +endfunction() + +# Internal function. +# +# Set macro value for ${macro_name} in ${header_content} to ${output_variable}. +# +# Example: +# arrow_extract_macro_value(version_major +# "ARROW_VERSION_MAJOR" +# "#define ARROW_VERSION_MAJOR 1.0.0") +# # -> version_major=1.0.0 +function(arrow_extract_macro_value output_variable macro_name header_content) + string(REGEX MATCH "#define +${macro_name} +[^\r\n]+" macro_definition + "${header_content}") + string(REGEX + REPLACE "^#define +${macro_name} +(.+)$" "\\1" macro_value "${macro_definition}") + set(${output_variable} "${macro_value}" PARENT_SCOPE) +endfunction() + +# Internal macro only for arrow_find_package. +# +# Find package in HOME. +macro(arrow_find_package_home) + find_path(${prefix}_include_dir "${header_path}" + PATHS "${home}" + PATH_SUFFIXES "include" + NO_DEFAULT_PATH) + set(include_dir "${${prefix}_include_dir}") + set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) + + if(MSVC) + set(CMAKE_SHARED_LIBRARY_SUFFIXES_ORIGINAL ${CMAKE_FIND_LIBRARY_SUFFIXES}) + # .dll isn't found by find_library with MSVC because .dll isn't included in + # CMAKE_FIND_LIBRARY_SUFFIXES. + list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}") + endif() + find_library(${prefix}_shared_lib + NAMES "${shared_lib_name}" + PATHS "${home}" + PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} + NO_DEFAULT_PATH) + if(MSVC) + set(CMAKE_SHARED_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_ORIGINAL}) + endif() + set(shared_lib "${${prefix}_shared_lib}") + set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE) + if(shared_lib) + add_library(${target_shared} SHARED IMPORTED) + set_target_properties(${target_shared} PROPERTIES IMPORTED_LOCATION "${shared_lib}") + if(include_dir) + set_target_properties(${target_shared} + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}") + endif() + find_library(${prefix}_import_lib + NAMES "${import_lib_name}" + PATHS "${home}" + PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} + NO_DEFAULT_PATH) + set(import_lib "${${prefix}_import_lib}") + set(${prefix}_IMPORT_LIB "${import_lib}" PARENT_SCOPE) + if(import_lib) + set_target_properties(${target_shared} PROPERTIES IMPORTED_IMPLIB "${import_lib}") + endif() + endif() + + find_library(${prefix}_static_lib + NAMES "${static_lib_name}" + PATHS "${home}" + PATH_SUFFIXES ${ARROW_SEARCH_LIB_PATH_SUFFIXES} + NO_DEFAULT_PATH) + set(static_lib "${${prefix}_static_lib}") + set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) + if(static_lib) + add_library(${target_static} STATIC IMPORTED) + set_target_properties(${target_static} PROPERTIES IMPORTED_LOCATION "${static_lib}") + if(include_dir) + set_target_properties(${target_static} + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}") + endif() + endif() +endmacro() + +# Internal macro only for arrow_find_package. +# +# Find package by CMake package configuration. +macro(arrow_find_package_cmake_package_configuration) + find_package(${cmake_package_name} CONFIG) + if(${cmake_package_name}_FOUND) + set(${prefix}_USE_CMAKE_PACKAGE_CONFIG TRUE PARENT_SCOPE) + if(TARGET ${target_shared}) + foreach(suffix ${ARROW_CONFIG_SUFFIXES}) + get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION${suffix}) + if(shared_lib) + # Remove shared library version: + # libarrow.so.100.0.0 -> libarrow.so + # Because ARROW_HOME and pkg-config approaches don't add + # shared library version. + string(REGEX + REPLACE "(${CMAKE_SHARED_LIBRARY_SUFFIX})[.0-9]+$" "\\1" shared_lib + "${shared_lib}") + set(${prefix}_SHARED_LIB "${shared_lib}" PARENT_SCOPE) + break() + endif() + endforeach() + endif() + if(TARGET ${target_static}) + foreach(suffix ${ARROW_CONFIG_SUFFIXES}) + get_target_property(static_lib ${target_static} IMPORTED_LOCATION${suffix}) + if(static_lib) + set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) + break() + endif() + endforeach() + endif() + endif() +endmacro() + +# Internal macro only for arrow_find_package. +# +# Find package by pkg-config. +macro(arrow_find_package_pkg_config) + pkg_check_modules(${prefix}_PC ${pkg_config_name}) + if(${prefix}_PC_FOUND) + set(${prefix}_USE_PKG_CONFIG TRUE PARENT_SCOPE) + + set(include_dir "${${prefix}_PC_INCLUDEDIR}") + set(lib_dir "${${prefix}_PC_LIBDIR}") + set(shared_lib_paths "${${prefix}_PC_LINK_LIBRARIES}") + # Use the first shared library path as the IMPORTED_LOCATION + # for ${target_shared}. This assumes that the first shared library + # path is the shared library path for this module. + list(GET shared_lib_paths 0 first_shared_lib_path) + # Use the rest shared library paths as the INTERFACE_LINK_LIBRARIES + # for ${target_shared}. This assumes that the rest shared library + # paths are dependency library paths for this module. + list(LENGTH shared_lib_paths n_shared_lib_paths) + if(n_shared_lib_paths LESS_EQUAL 1) + set(rest_shared_lib_paths) + else() + list(SUBLIST + shared_lib_paths + 1 + -1 + rest_shared_lib_paths) + endif() + + set(${prefix}_VERSION "${${prefix}_PC_VERSION}" PARENT_SCOPE) + set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) + set(${prefix}_SHARED_LIB "${first_shared_lib_path}" PARENT_SCOPE) + + add_library(${target_shared} SHARED IMPORTED) + set_target_properties(${target_shared} + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${include_dir}" + INTERFACE_LINK_LIBRARIES + "${rest_shared_lib_paths}" + IMPORTED_LOCATION + "${first_shared_lib_path}") + get_target_property(shared_lib ${target_shared} IMPORTED_LOCATION) + + find_library(${prefix}_static_lib + NAMES "${static_lib_name}" + PATHS "${lib_dir}" + NO_DEFAULT_PATH) + set(static_lib "${${prefix}_static_lib}") + set(${prefix}_STATIC_LIB "${static_lib}" PARENT_SCOPE) + if(static_lib) + add_library(${target_static} STATIC IMPORTED) + set_target_properties(${target_static} + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${include_dir}" + IMPORTED_LOCATION "${static_lib}") + endif() + endif() +endmacro() + +function(arrow_find_package + prefix + home + base_name + header_path + cmake_package_name + pkg_config_name) + arrow_build_shared_library_name(shared_lib_name ${base_name}) + arrow_build_import_library_name(import_lib_name ${base_name}) + arrow_build_static_library_name(static_lib_name ${base_name}) + + set(target_shared ${base_name}_shared) + set(target_static ${base_name}_static) + + if(home) + arrow_find_package_home() + set(${prefix}_FIND_APPROACH "HOME: ${home}" PARENT_SCOPE) + else() + arrow_find_package_cmake_package_configuration() + if(${cmake_package_name}_FOUND) + set(${prefix}_FIND_APPROACH + "CMake package configuration: ${cmake_package_name}" + PARENT_SCOPE) + else() + arrow_find_package_pkg_config() + set(${prefix}_FIND_APPROACH "pkg-config: ${pkg_config_name}" PARENT_SCOPE) + endif() + endif() + + if(NOT include_dir) + if(TARGET ${target_shared}) + get_target_property(include_dir ${target_shared} INTERFACE_INCLUDE_DIRECTORIES) + elseif(TARGET ${target_static}) + get_target_property(include_dir ${target_static} INTERFACE_INCLUDE_DIRECTORIES) + endif() + endif() + if(include_dir) + set(${prefix}_INCLUDE_DIR "${include_dir}" PARENT_SCOPE) + endif() + + if(shared_lib) + get_filename_component(lib_dir "${shared_lib}" DIRECTORY) + elseif(static_lib) + get_filename_component(lib_dir "${static_lib}" DIRECTORY) + else() + set(lib_dir NOTFOUND) + endif() + set(${prefix}_LIB_DIR "${lib_dir}" PARENT_SCOPE) + # For backward compatibility + set(${prefix}_LIBS "${lib_dir}" PARENT_SCOPE) +endfunction() + +if(NOT "$ENV{ARROW_HOME}" STREQUAL "") + file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME) +endif() +arrow_find_package(ARROW + "${ARROW_HOME}" + arrow + arrow/api.h + Arrow + arrow) + +if(ARROW_HOME) + if(ARROW_INCLUDE_DIR) + file(READ "${ARROW_INCLUDE_DIR}/arrow/util/config.h" ARROW_CONFIG_H_CONTENT) + arrow_extract_macro_value(ARROW_VERSION_MAJOR "ARROW_VERSION_MAJOR" + "${ARROW_CONFIG_H_CONTENT}") + arrow_extract_macro_value(ARROW_VERSION_MINOR "ARROW_VERSION_MINOR" + "${ARROW_CONFIG_H_CONTENT}") + arrow_extract_macro_value(ARROW_VERSION_PATCH "ARROW_VERSION_PATCH" + "${ARROW_CONFIG_H_CONTENT}") + if("${ARROW_VERSION_MAJOR}" STREQUAL "" + OR "${ARROW_VERSION_MINOR}" STREQUAL "" + OR "${ARROW_VERSION_PATCH}" STREQUAL "") + set(ARROW_VERSION "0.0.0") + else() + set(ARROW_VERSION + "${ARROW_VERSION_MAJOR}.${ARROW_VERSION_MINOR}.${ARROW_VERSION_PATCH}") + endif() + + arrow_extract_macro_value(ARROW_SO_VERSION_QUOTED "ARROW_SO_VERSION" + "${ARROW_CONFIG_H_CONTENT}") + string(REGEX REPLACE "^\"(.+)\"$" "\\1" ARROW_SO_VERSION "${ARROW_SO_VERSION_QUOTED}") + arrow_extract_macro_value(ARROW_FULL_SO_VERSION_QUOTED "ARROW_FULL_SO_VERSION" + "${ARROW_CONFIG_H_CONTENT}") + string(REGEX + REPLACE "^\"(.+)\"$" "\\1" ARROW_FULL_SO_VERSION + "${ARROW_FULL_SO_VERSION_QUOTED}") + endif() +else() + if(ARROW_USE_CMAKE_PACKAGE_CONFIG) + find_package(Arrow CONFIG) + elseif(ARROW_USE_PKG_CONFIG) + pkg_get_variable(ARROW_SO_VERSION arrow so_version) + pkg_get_variable(ARROW_FULL_SO_VERSION arrow full_so_version) + endif() +endif() + +set(ARROW_ABI_VERSION ${ARROW_SO_VERSION}) + +mark_as_advanced(ARROW_ABI_VERSION + ARROW_CONFIG_SUFFIXES + ARROW_FULL_SO_VERSION + ARROW_IMPORT_LIB + ARROW_INCLUDE_DIR + ARROW_LIBS + ARROW_LIB_DIR + ARROW_SEARCH_LIB_PATH_SUFFIXES + ARROW_SHARED_IMP_LIB + ARROW_SHARED_LIB + ARROW_SO_VERSION + ARROW_STATIC_LIB + ARROW_VERSION + ARROW_VERSION_MAJOR + ARROW_VERSION_MINOR + ARROW_VERSION_PATCH) + +find_package_handle_standard_args(Arrow REQUIRED_VARS + # The first required variable is shown + # in the found message. So this list is + # not sorted alphabetically. + ARROW_INCLUDE_DIR + ARROW_LIB_DIR + ARROW_FULL_SO_VERSION + ARROW_SO_VERSION + VERSION_VAR + ARROW_VERSION) +set(ARROW_FOUND ${Arrow_FOUND}) + +if(Arrow_FOUND AND NOT Arrow_FIND_QUIETLY) + message(STATUS "Arrow version: ${ARROW_VERSION} (${ARROW_FIND_APPROACH})") + message(STATUS "Arrow SO and ABI version: ${ARROW_SO_VERSION}") + message(STATUS "Arrow full SO version: ${ARROW_FULL_SO_VERSION}") + message(STATUS "Found the Arrow core shared library: ${ARROW_SHARED_LIB}") + message(STATUS "Found the Arrow core import library: ${ARROW_IMPORT_LIB}") + message(STATUS "Found the Arrow core static library: ${ARROW_STATIC_LIB}") endif() diff --git a/cmake/Modules/FindParquet.cmake b/cmake/Modules/FindParquet.cmake index ab9c31efe2d..654020c0b87 100644 --- a/cmake/Modules/FindParquet.cmake +++ b/cmake/Modules/FindParquet.cmake @@ -17,131 +17,116 @@ # specific language governing permissions and limitations # under the License. -# - Find PARQUET (parquet/parquet.h, libparquet.a, libparquet.so) +# - Find Parquet (parquet/api/reader.h, libparquet.a, libparquet.so) +# +# This module requires Arrow from which it uses +# arrow_find_package() +# # This module defines +# PARQUET_FOUND, whether Parquet has been found +# PARQUET_IMPORT_LIB, path to libparquet's import library (Windows only) # PARQUET_INCLUDE_DIR, directory containing headers -# PARQUET_LIBS, directory containing parquet libraries -# PARQUET_STATIC_LIB, path to libparquet.a +# PARQUET_LIBS, deprecated. Use PARQUET_LIB_DIR instead +# PARQUET_LIB_DIR, directory containing Parquet libraries +# PARQUET_SHARED_IMP_LIB, deprecated. Use PARQUET_IMPORT_LIB instead # PARQUET_SHARED_LIB, path to libparquet's shared library -# PARQUET_SHARED_IMP_LIB, path to libparquet's import library (MSVC only) -# PARQUET_FOUND, whether parquet has been found +# PARQUET_SO_VERSION, shared object version of found Parquet such as "100" +# PARQUET_STATIC_LIB, path to libparquet.a -include(FindPkgConfig) +if(DEFINED PARQUET_FOUND) + return() +endif() + +set(find_package_arguments) +if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION) + list(APPEND find_package_arguments "${${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION}") +endif() +if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + list(APPEND find_package_arguments REQUIRED) +endif() +if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + list(APPEND find_package_arguments QUIET) +endif() +find_package(Arrow ${find_package_arguments}) if(NOT "$ENV{PARQUET_HOME}" STREQUAL "") - set(PARQUET_HOME "$ENV{PARQUET_HOME}") + file(TO_CMAKE_PATH "$ENV{PARQUET_HOME}" PARQUET_HOME) endif() -if (MSVC) - SET(CMAKE_FIND_LIBRARY_SUFFIXES ".lib" ".dll") +if((NOT PARQUET_HOME) AND ARROW_HOME) + set(PARQUET_HOME ${ARROW_HOME}) +endif() - if (MSVC AND NOT DEFINED PARQUET_MSVC_STATIC_LIB_SUFFIX) - set(PARQUET_MSVC_STATIC_LIB_SUFFIX "_static") - endif() - - find_library(PARQUET_SHARED_LIBRARIES NAMES parquet - PATHS ${PARQUET_HOME} NO_DEFAULT_PATH - PATH_SUFFIXES "bin" ) - - get_filename_component(PARQUET_SHARED_LIBS ${PARQUET_SHARED_LIBRARIES} PATH ) -endif () - -if(PARQUET_HOME) - set(PARQUET_SEARCH_HEADER_PATHS - ${PARQUET_HOME}/include - ) - set(PARQUET_SEARCH_LIB_PATH - ${PARQUET_HOME}/lib - ) - find_path(PARQUET_INCLUDE_DIR parquet/api/reader.h PATHS - ${PARQUET_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH - ) - find_library(PARQUET_LIBRARIES NAMES parquet - PATHS ${PARQUET_HOME} NO_DEFAULT_PATH - PATH_SUFFIXES "lib") - get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH ) - - # Try to autodiscover the Parquet ABI version - get_filename_component(PARQUET_LIB_REALPATH ${PARQUET_LIBRARIES} REALPATH) - get_filename_component(PARQUET_EXT_REALPATH ${PARQUET_LIB_REALPATH} EXT) - string(REGEX MATCH ".([0-9]+.[0-9]+.[0-9]+)" HAS_ABI_VERSION ${PARQUET_EXT_REALPATH}) - if (HAS_ABI_VERSION) - if (APPLE) - string(REGEX REPLACE ".([0-9]+.[0-9]+.[0-9]+).dylib" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH}) +if(ARROW_FOUND) + arrow_find_package(PARQUET + "${PARQUET_HOME}" + parquet + parquet/api/reader.h + Parquet + parquet) + if(PARQUET_HOME) + if(PARQUET_INCLUDE_DIR) + file(READ "${PARQUET_INCLUDE_DIR}/parquet/parquet_version.h" + PARQUET_VERSION_H_CONTENT) + arrow_extract_macro_value(PARQUET_VERSION_MAJOR "PARQUET_VERSION_MAJOR" + "${PARQUET_VERSION_H_CONTENT}") + arrow_extract_macro_value(PARQUET_VERSION_MINOR "PARQUET_VERSION_MINOR" + "${PARQUET_VERSION_H_CONTENT}") + arrow_extract_macro_value(PARQUET_VERSION_PATCH "PARQUET_VERSION_PATCH" + "${PARQUET_VERSION_H_CONTENT}") + if("${PARQUET_VERSION_MAJOR}" STREQUAL "" + OR "${PARQUET_VERSION_MINOR}" STREQUAL "" + OR "${PARQUET_VERSION_PATCH}" STREQUAL "") + set(PARQUET_VERSION "0.0.0") else() - string(REGEX REPLACE ".so.([0-9]+.[0-9]+.[0-9]+)" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH}) + set(PARQUET_VERSION + "${PARQUET_VERSION_MAJOR}.${PARQUET_VERSION_MINOR}.${PARQUET_VERSION_PATCH}") endif() - string(REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" PARQUET_SO_VERSION ${PARQUET_ABI_VERSION}) - else() - set(PARQUET_ABI_VERSION "1.0.0") - set(PARQUET_SO_VERSION "1") + + arrow_extract_macro_value(PARQUET_SO_VERSION_QUOTED "PARQUET_SO_VERSION" + "${PARQUET_VERSION_H_CONTENT}") + string(REGEX + REPLACE "^\"(.+)\"$" "\\1" PARQUET_SO_VERSION "${PARQUET_SO_VERSION_QUOTED}") + arrow_extract_macro_value(PARQUET_FULL_SO_VERSION_QUOTED "PARQUET_FULL_SO_VERSION" + "${PARQUET_VERSION_H_CONTENT}") + string(REGEX + REPLACE "^\"(.+)\"$" "\\1" PARQUET_FULL_SO_VERSION + "${PARQUET_FULL_SO_VERSION_QUOTED}") endif() -else() - pkg_check_modules(PARQUET parquet) - if (PARQUET_FOUND) - pkg_get_variable(PARQUET_ABI_VERSION parquet abi_version) - message(STATUS "Parquet C++ ABI version: ${PARQUET_ABI_VERSION}") - pkg_get_variable(PARQUET_SO_VERSION parquet so_version) - message(STATUS "Parquet C++ SO version: ${PARQUET_SO_VERSION}") - set(PARQUET_INCLUDE_DIR ${PARQUET_INCLUDE_DIRS}) - set(PARQUET_LIBS ${PARQUET_LIBRARY_DIRS}) - set(PARQUET_SEARCH_LIB_PATH ${PARQUET_LIBRARY_DIRS}) - message(STATUS "Searching for parquet libs in: ${PARQUET_SEARCH_LIB_PATH}") - find_library(PARQUET_LIBRARIES NAMES parquet - PATHS ${PARQUET_SEARCH_LIB_PATH} NO_DEFAULT_PATH) - else() - find_path(PARQUET_INCLUDE_DIR NAMES parquet/api/reader.h ) - find_library(PARQUET_LIBRARIES NAMES parquet) - get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH ) + else() + if(PARQUET_USE_CMAKE_PACKAGE_CONFIG) + find_package(Parquet CONFIG) + elseif(PARQUET_USE_PKG_CONFIG) + pkg_get_variable(PARQUET_SO_VERSION parquet so_version) + pkg_get_variable(PARQUET_FULL_SO_VERSION parquet full_so_version) endif() + endif() + set(PARQUET_ABI_VERSION "${PARQUET_SO_VERSION}") endif() -if (PARQUET_INCLUDE_DIR AND PARQUET_LIBRARIES) - set(PARQUET_FOUND TRUE) - set(PARQUET_LIB_NAME parquet) - if (MSVC) - set(PARQUET_STATIC_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}${PARQUET_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(PARQUET_SHARED_LIB "${PARQUET_SHARED_LIBS}/${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") - set(PARQUET_SHARED_IMP_LIB "${PARQUET_LIBS}/${PARQUET_LIB_NAME}.lib") - else() - set(PARQUET_STATIC_LIB ${PARQUET_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${PARQUET_LIB_NAME}.a) - set(PARQUET_SHARED_LIB ${PARQUET_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${PARQUET_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif() -else () - set(PARQUET_FOUND FALSE) -endif () +mark_as_advanced(PARQUET_ABI_VERSION + PARQUET_IMPORT_LIB + PARQUET_INCLUDE_DIR + PARQUET_LIBS + PARQUET_LIB_DIR + PARQUET_SHARED_IMP_LIB + PARQUET_SHARED_LIB + PARQUET_SO_VERSION + PARQUET_STATIC_LIB + PARQUET_VERSION) -if (PARQUET_FOUND) - if (NOT Parquet_FIND_QUIETLY) - message(STATUS "Found the Parquet library: ${PARQUET_LIBRARIES}") - endif () -else () - if (NOT Parquet_FIND_QUIETLY) - if (NOT PARQUET_FOUND) - set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Could not find the parquet library.") - endif() +find_package_handle_standard_args(Parquet + REQUIRED_VARS + PARQUET_INCLUDE_DIR + PARQUET_LIB_DIR + PARQUET_SO_VERSION + VERSION_VAR + PARQUET_VERSION) +set(PARQUET_FOUND ${Parquet_FOUND}) - set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} Looked in ") - if ( _parquet_roots ) - set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} in ${_parquet_roots}.") - else () - set(PARQUET_ERR_MSG "${PARQUET_ERR_MSG} system search paths.") - endif () - if (Parquet_FIND_REQUIRED) - message(FATAL_ERROR "${PARQUET_ERR_MSG}") - else (Parquet_FIND_REQUIRED) - message(STATUS "${PARQUET_ERR_MSG}") - endif (Parquet_FIND_REQUIRED) - endif () -endif () - -mark_as_advanced( - PARQUET_FOUND - PARQUET_INCLUDE_DIR - PARQUET_LIBS - PARQUET_LIBRARIES - PARQUET_STATIC_LIB - PARQUET_SHARED_LIB -) +if(Parquet_FOUND AND NOT Parquet_FIND_QUIETLY) + message(STATUS "Parquet version: ${PARQUET_VERSION} (${PARQUET_FIND_APPROACH})") + message(STATUS "Found the Parquet shared library: ${PARQUET_SHARED_LIB}") + message(STATUS "Found the Parquet import library: ${PARQUET_IMPORT_LIB}") + message(STATUS "Found the Parquet static library: ${PARQUET_STATIC_LIB}") +endif() diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index 3cb6be38c3f..d9b1cdcf70f 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -27,18 +27,91 @@ if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt") set(MISSING_INTERNAL_PARQUET_LIBRARY 1) endif() +if (NOT SNAPPY_LIBRARY) + include(cmake/find/snappy.cmake) +endif() + if(NOT USE_INTERNAL_PARQUET_LIBRARY) find_package(Arrow) find_package(Parquet) + find_library(THRIFT_LIBRARY thrift) + find_library(UTF8_PROC_LIBRARY utf8proc) + find_package(BZip2) - if(NOT ARROW_INCLUDE_DIR OR PARQUET_INCLUDE_DIR) - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system parquet: arrow=${ARROW_INCLUDE_DIR} parquet=${PARQUET_INCLUDE_DIR}" ) + if(USE_STATIC_LIBRARIES) + find_library(ARROW_DEPS_LIBRARY arrow_bundled_dependencies) + + if (ARROW_DEPS_LIBRARY) + set(ARROW_IMPORT_OBJ_DIR "${CMAKE_CURRENT_BINARY_DIR}/contrib/arrow-cmake/imported-objects") + set(ARROW_OTHER_OBJS + "${ARROW_IMPORT_OBJ_DIR}/jemalloc.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/arena.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/background_thread.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/base.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/bin.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/bitmap.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/ckh.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/ctl.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/div.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/extent.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/extent_dss.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/extent_mmap.pic.o" + # skip hash + "${ARROW_IMPORT_OBJ_DIR}/hook.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/large.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/log.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/malloc_io.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/mutex.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/mutex_pool.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/nstime.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/pages.pic.o" + # skip prng + "${ARROW_IMPORT_OBJ_DIR}/prof.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/rtree.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/stats.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/sc.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/sz.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/tcache.pic.o" + # skip ticker + "${ARROW_IMPORT_OBJ_DIR}/tsd.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/test_hooks.pic.o" + "${ARROW_IMPORT_OBJ_DIR}/witness.pic.o" + ) + add_custom_command(OUTPUT ${ARROW_OTHER_OBJS} + COMMAND + mkdir -p "${ARROW_IMPORT_OBJ_DIR}" && + cd "${ARROW_IMPORT_OBJ_DIR}" && + "${CMAKE_AR}" x "${ARROW_DEPS_LIBRARY}" + ) + set_source_files_properties(jemalloc.pic.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) + add_library(imported_arrow_deps STATIC ${ARROW_OTHER_OBJS}) + + set(ARROW_LIBRARY ${ARROW_STATIC_LIB} + imported_arrow_deps ${THRIFT_LIBRARY} ${UTF8_PROC_LIBRARY} ${BZIP2_LIBRARIES} ${SNAPPY_LIBRARY}) + else() + message(WARNING "Using external static Arrow does not always work. " + "Could not find arrow_bundled_dependencies.a. If compilation fails, " + "Try: -D\"USE_INTERNAL_PARQUET_LIBRARY\"=ON or -D\"ENABLE_PARQUET\"=OFF or " + "-D\"USE_STATIC_LIBRARIES\"=OFF") + set(ARROW_LIBRARY ${ARROW_STATIC_LIB}) + endif() + set(PARQUET_LIBRARY ${PARQUET_STATIC_LIB}) + else() + set(ARROW_LIBRARY ${ARROW_SHARED_LIB}) + set(PARQUET_LIBRARY ${PARQUET_SHARED_LIB}) + endif() + + if(ARROW_INCLUDE_DIR AND ARROW_LIBRARY AND PARQUET_INCLUDE_DIR AND PARQUET_LIBRARY AND THRIFT_LIBRARY AND UTF8_PROC_LIBRARY AND BZIP2_FOUND) + set(USE_PARQUET 1) + set(EXTERNAL_PARQUET_FOUND 1) + else() + message (${RECONFIGURE_MESSAGE_LEVEL} + "Can't find system parquet: arrow=${ARROW_INCLUDE_DIR} parquet=${PARQUET_INCLUDE_DIR}" ) + set(EXTERNAL_PARQUET_FOUND 0) endif() endif() -if(ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR) -elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) - include(cmake/find/snappy.cmake) +if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) if(SNAPPY_LIBRARY) set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1) else() @@ -95,7 +168,10 @@ elseif(OS_FREEBSD) endif() if(USE_PARQUET) - message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY} ; ${FLATBUFFERS_LIBRARY}") + message(STATUS "Using Parquet: arrow=${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ;" + " parquet=${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ;" + " thrift=${THRIFT_LIBRARY} ;" + " flatbuffers=${FLATBUFFERS_LIBRARY}") else() message(STATUS "Building without Parquet support") endif() diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index dc1040f6a41..013dc513494 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -101,6 +101,8 @@ RUN apt update && \ libsnappy-dev \ libparquet-dev \ libthrift-dev \ + libutf8proc-dev \ + libbz2-dev \ libavro-dev \ libfarmhash-dev \ libmysqlclient-dev \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 509d7a2150b..843dd8c2615 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -331,6 +331,9 @@ if (USE_PARQUET) dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY}) if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR}) + if (USE_STATIC_LIBRARIES) + dbms_target_link_libraries(PRIVATE ${ARROW_LIBRARY}) + endif() endif () endif ()