Merge pull request #4132 from danlark1/master

Add powerpc build support
This commit is contained in:
alexey-milovidov 2019-01-23 20:18:34 +03:00 committed by GitHub
commit e1c78a3b13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 164 additions and 66 deletions

View File

@ -25,9 +25,10 @@ Various possible options. We are not going to automate testing all of them.
#### CPU architectures:
- x86_64;
- AArch64.
- AArch64;
- PowerPC64LE.
x86_64 is the main CPU architecture. We also have minimal support for AArch64.
x86_64 is the main CPU architecture. We also have minimal support for AArch64 and PowerPC64LE.
#### Operating systems:
- Linux;

View File

@ -24,3 +24,10 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set (COMPILER_CLANG 1)
endif ()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
set (ARCH_PPC64LE 1)
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
message(FATAL_ERROR "Only gcc-8 is supported for powerpc architecture")
endif ()
endif ()

View File

@ -27,6 +27,9 @@ if (HAVE_SSE41)
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
endif ()
if (ARCH_PPC64LE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS")
endif ()
# gcc -dM -E -msse4.2 - < /dev/null | sort > gcc-dump-sse42
#define __SSE4_2__ 1

View File

@ -341,6 +341,13 @@ static inline __m128i libdivide_get_0000FFFF(void) {
#pragma clang diagnostic pop
#endif
/// This is a bug in gcc-8, _MM_SHUFFLE was forgotten, though in trunk it is ok https://github.com/gcc-mirror/gcc/blob/master/gcc/config/rs6000/xmmintrin.h#L61
#if defined(__PPC__)
#ifndef _MM_SHUFFLE
#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
#endif
#endif
static inline __m128i libdivide_s64_signbits(__m128i v) {
//we want to compute v >> 63, that is, _mm_srai_epi64(v, 63). But there is no 64 bit shift right arithmetic instruction in SSE2. So we have to fake it by first duplicating the high 32 bit values, and then using a 32 bit shift. Another option would be to use _mm_srli_epi64(v, 63) and then subtract that from 0, but that approach appears to be substantially slower for unknown reasons
__m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1));

View File

@ -7,9 +7,9 @@ CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
IF(ENABLE_SSE STREQUAL ON)
IF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
ENDIF(ENABLE_SSE STREQUAL ON)
ENDIF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM)
IF(NOT TEST_HDFS_PREFIX)
SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)

View File

@ -60,7 +60,9 @@
// WITH_SASL_SCRAM
//#define WITH_SASL_SCRAM 1
// crc32chw
#if !defined(__PPC__)
#define WITH_CRC32C_HW 1
#endif
// regex
#define HAVE_REGEX 1
// strndup

View File

@ -1,56 +1,110 @@
enable_language(ASM)
add_library(unwind
src/mi/init.c
src/mi/flush_cache.c
src/mi/mempool.c
src/mi/strerror.c
src/x86_64/is_fpreg.c
src/x86_64/regname.c
src/mi/_ReadULEB.c
src/mi/_ReadSLEB.c
src/mi/backtrace.c
src/mi/dyn-cancel.c
src/mi/dyn-info-list.c
src/mi/dyn-register.c
src/mi/Ldyn-extract.c
src/mi/Lfind_dynamic_proc_info.c
src/mi/Lget_accessors.c
src/mi/Lget_proc_info_by_ip.c
src/mi/Lget_proc_name.c
src/mi/Lput_dynamic_unwind_info.c
src/mi/Ldestroy_addr_space.c
src/mi/Lget_reg.c
src/mi/Lset_reg.c
src/mi/Lget_fpreg.c
src/mi/Lset_fpreg.c
src/mi/Lset_caching_policy.c
src/x86_64/setcontext.S
src/x86_64/Lcreate_addr_space.c
src/x86_64/Lget_save_loc.c
src/x86_64/Lglobal.c
src/x86_64/Linit.c
src/x86_64/Linit_local.c
src/x86_64/Linit_remote.c
src/x86_64/Lget_proc_info.c
src/x86_64/Lregs.c
src/x86_64/Lresume.c
src/x86_64/Lstash_frame.c
src/x86_64/Lstep.c
src/x86_64/Ltrace.c
src/x86_64/getcontext.S
src/dwarf/Lexpr.c
src/dwarf/Lfde.c
src/dwarf/Lfind_proc_info-lsb.c
src/dwarf/Lparser.c
src/dwarf/Lpe.c
src/dwarf/global.c
src/elf64.c
if (ARCH_PPC64LE)
add_library(unwind
src/mi/init.c
src/mi/flush_cache.c
src/mi/mempool.c
src/mi/strerror.c
src/mi/_ReadULEB.c
src/mi/_ReadSLEB.c
src/mi/backtrace.c
src/mi/dyn-cancel.c
src/mi/dyn-info-list.c
src/mi/dyn-register.c
src/mi/Ldyn-extract.c
src/mi/Lfind_dynamic_proc_info.c
src/mi/Lget_accessors.c
src/mi/Lget_proc_info_by_ip.c
src/mi/Lget_proc_name.c
src/mi/Lput_dynamic_unwind_info.c
src/mi/Ldestroy_addr_space.c
src/mi/Lget_reg.c
src/mi/Lset_reg.c
src/mi/Lget_fpreg.c
src/mi/Lset_fpreg.c
src/mi/Lset_caching_policy.c
src/dwarf/Lexpr.c
src/dwarf/Lfde.c
src/dwarf/Lfind_proc_info-lsb.c
src/dwarf/Lparser.c
src/dwarf/Lpe.c
src/dwarf/global.c
src/elf64.c
src/os-linux.c
src/os-linux.c
src/x86_64/Los-linux.c
)
src/ppc64/is_fpreg.c
src/ppc64/regname.c
src/ppc64/get_func_addr.c
src/ppc/Linit_local.c
src/ppc/Linit_remote.c
src/ppc/Lis_signal_frame.c
src/ppc/longjmp.S
src/ppc/Lreg_states_iterate.c
src/ppc/siglongjmp.S
src/ppc64/setcontext.S
src/ppc64/Lcreate_addr_space.c
src/ppc64/Lglobal.c
src/ppc64/Linit.c
src/ppc64/Lreg_states_iterate.c
src/ppc64/Lregs.c
src/ppc64/Lresume.c
src/ppc64/Lstep.c
src/ppc64/regname.c
src/ppc64/setcontext.S
)
else ()
add_library(unwind
src/mi/init.c
src/mi/flush_cache.c
src/mi/mempool.c
src/mi/strerror.c
src/mi/_ReadULEB.c
src/mi/_ReadSLEB.c
src/mi/backtrace.c
src/mi/dyn-cancel.c
src/mi/dyn-info-list.c
src/mi/dyn-register.c
src/mi/Ldyn-extract.c
src/mi/Lfind_dynamic_proc_info.c
src/mi/Lget_accessors.c
src/mi/Lget_proc_info_by_ip.c
src/mi/Lget_proc_name.c
src/mi/Lput_dynamic_unwind_info.c
src/mi/Ldestroy_addr_space.c
src/mi/Lget_reg.c
src/mi/Lset_reg.c
src/mi/Lget_fpreg.c
src/mi/Lset_fpreg.c
src/mi/Lset_caching_policy.c
src/dwarf/Lexpr.c
src/dwarf/Lfde.c
src/dwarf/Lfind_proc_info-lsb.c
src/dwarf/Lparser.c
src/dwarf/Lpe.c
src/dwarf/global.c
src/elf64.c
src/os-linux.c
src/x86_64/is_fpreg.c
src/x86_64/regname.c
src/x86_64/setcontext.S
src/x86_64/Lcreate_addr_space.c
src/x86_64/Lget_save_loc.c
src/x86_64/Lglobal.c
src/x86_64/Linit.c
src/x86_64/Linit_local.c
src/x86_64/Linit_remote.c
src/x86_64/Lget_proc_info.c
src/x86_64/Lregs.c
src/x86_64/Lresume.c
src/x86_64/Lstash_frame.c
src/x86_64/Lstep.c
src/x86_64/Ltrace.c
src/x86_64/getcontext.S
src/x86_64/Los-linux.c
)
endif()
find_file (HAVE_ATOMIC_OPS_H "atomic_ops.h")
configure_file (config/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/config.h)

View File

@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include <stdlib.h>
#include <libunwind.h>
#include <libunwind-ppc64.h>
#include "elf64.h"
#include "mempool.h"

View File

@ -25,7 +25,7 @@ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "unwind_i.h"
#include "../ppc64/unwind_i.h"
PROTECTED int
unw_reg_states_iterate (unw_cursor_t *cursor,

View File

@ -70,7 +70,7 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset)
}
#ifdef __x86_64__
#if defined(__x86_64__) || defined(__PPC__)
/** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes.
*
@ -260,7 +260,7 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset)
}
#ifdef __x86_64__
#if defined(__x86_64__) || defined(__PPC__)
inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
{

View File

@ -82,9 +82,9 @@
#endif
#define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64 (work in progress)"
#define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)"
#if !defined(__x86_64__) && !defined(__aarch64__)
#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
// #error PLATFORM_NOT_SUPPORTED
#endif

View File

@ -129,7 +129,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars & data, ColumnSt
if (size)
{
#ifdef __SSE2__
#ifdef __x86_64__
/// An optimistic branch in which more efficient copying is possible.
if (offset + 16 * UNROLL_TIMES <= data.allocated_bytes() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
{

View File

@ -44,6 +44,12 @@ inline size_t readAlpha(char * res, size_t max_chars, ReadBuffer & in)
return num_chars;
}
#if defined(__PPC__)
#if !__clang__
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
#endif
template <size_t digit, size_t power_of_ten, typename T>
inline void readDecimalNumberImpl(T & res, const char * src)
{
@ -513,6 +519,11 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
}
#if defined(__PPC__)
#if !__clang__
#pragma GCC diagnostic pop
#endif
#endif
void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
{

View File

@ -154,7 +154,8 @@ Normally we release and run all tests on a single variant of ClickHouse build. B
- build on FreeBSD;
- build on Debian with libraries from system packages;
- build with shared linking of libraries;
- build on AArch64 platform.
- build on AArch64 platform;
- build on PowerPc platform.
For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.

View File

@ -1,4 +1,4 @@
if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32)
if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE)
set(ENABLE_JEMALLOC_DEFAULT 1)
else ()
set(ENABLE_JEMALLOC_DEFAULT 0)

View File

@ -14,6 +14,11 @@
#define DATE_LUT_MAX_YEAR 2105 /// Last supported year
#define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table
#if defined(__PPC__)
#if !__clang__
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
#endif
/** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
* First time was implemented for OLAPServer, that needed to do billions of such transformations.
@ -684,3 +689,9 @@ public:
return s;
}
};
#if defined(__PPC__)
#if !__clang__
#pragma GCC diagnostic pop
#endif
#endif