mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge pull request #48294 from ClickHouse/update-arrow-2
Try to update arrow library to release 11.0.0
This commit is contained in:
commit
fb3af065f4
81
base/glibc-compatibility/musl/expf.c
Normal file
81
base/glibc-compatibility/musl/expf.c
Normal file
@ -0,0 +1,81 @@
|
||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */
|
||||
/*
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
*/
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
static const float
|
||||
half[2] = {0.5,-0.5},
|
||||
ln2hi = 6.9314575195e-1f, /* 0x3f317200 */
|
||||
ln2lo = 1.4286067653e-6f, /* 0x35bfbe8e */
|
||||
invln2 = 1.4426950216e+0f, /* 0x3fb8aa3b */
|
||||
/*
|
||||
* Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]:
|
||||
* |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74
|
||||
*/
|
||||
P1 = 1.6666625440e-1f, /* 0xaaaa8f.0p-26 */
|
||||
P2 = -2.7667332906e-3f; /* -0xb55215.0p-32 */
|
||||
|
||||
float expf(float x)
|
||||
{
|
||||
float_t hi, lo, c, xx, y;
|
||||
int k, sign;
|
||||
uint32_t hx;
|
||||
|
||||
GET_FLOAT_WORD(hx, x);
|
||||
sign = hx >> 31; /* sign bit of x */
|
||||
hx &= 0x7fffffff; /* high word of |x| */
|
||||
|
||||
/* special cases */
|
||||
if (hx >= 0x42aeac50) { /* if |x| >= -87.33655f or NaN */
|
||||
if (hx >= 0x42b17218 && !sign) { /* x >= 88.722839f */
|
||||
/* overflow */
|
||||
x *= 0x1p127f;
|
||||
return x;
|
||||
}
|
||||
if (sign) {
|
||||
/* underflow */
|
||||
FORCE_EVAL(-0x1p-149f/x);
|
||||
if (hx >= 0x42cff1b5) /* x <= -103.972084f */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* argument reduction */
|
||||
if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
|
||||
if (hx > 0x3f851592) /* if |x| > 1.5 ln2 */
|
||||
k = invln2*x + half[sign];
|
||||
else
|
||||
k = 1 - sign - sign;
|
||||
hi = x - k*ln2hi; /* k*ln2hi is exact here */
|
||||
lo = k*ln2lo;
|
||||
x = hi - lo;
|
||||
} else if (hx > 0x39000000) { /* |x| > 2**-14 */
|
||||
k = 0;
|
||||
hi = x;
|
||||
lo = 0;
|
||||
} else {
|
||||
/* raise inexact */
|
||||
FORCE_EVAL(0x1p127f + x);
|
||||
return 1 + x;
|
||||
}
|
||||
|
||||
/* x is now in primary range */
|
||||
xx = x*x;
|
||||
c = x - xx*(P1+xx*P2);
|
||||
y = 1 + (x*c/(2-c) - lo + hi);
|
||||
if (k == 0)
|
||||
return y;
|
||||
return scalbnf(y, k);
|
||||
}
|
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
31
base/glibc-compatibility/musl/scalbnf.c
Normal file
@ -0,0 +1,31 @@
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
float scalbnf(float x, int n)
|
||||
{
|
||||
union {float f; uint32_t i;} u;
|
||||
float_t y = x;
|
||||
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127) {
|
||||
y *= 0x1p127f;
|
||||
n -= 127;
|
||||
if (n > 127)
|
||||
n = 127;
|
||||
}
|
||||
} else if (n < -126) {
|
||||
y *= 0x1p-126f;
|
||||
n += 126;
|
||||
if (n < -126) {
|
||||
y *= 0x1p-126f;
|
||||
n += 126;
|
||||
if (n < -126)
|
||||
n = -126;
|
||||
}
|
||||
}
|
||||
u.i = (uint32_t)(0x7f+n)<<23;
|
||||
x = y * u.f;
|
||||
return x;
|
||||
}
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666
|
||||
Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f
|
@ -202,6 +202,7 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/builder.cc"
|
||||
"${LIBRARY_DIR}/buffer.cc"
|
||||
"${LIBRARY_DIR}/chunked_array.cc"
|
||||
"${LIBRARY_DIR}/chunk_resolver.cc"
|
||||
"${LIBRARY_DIR}/compare.cc"
|
||||
"${LIBRARY_DIR}/config.cc"
|
||||
"${LIBRARY_DIR}/datum.cc"
|
||||
@ -268,6 +269,10 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/util/uri.cc"
|
||||
"${LIBRARY_DIR}/util/utf8.cc"
|
||||
"${LIBRARY_DIR}/util/value_parsing.cc"
|
||||
"${LIBRARY_DIR}/util/byte_size.cc"
|
||||
"${LIBRARY_DIR}/util/debug.cc"
|
||||
"${LIBRARY_DIR}/util/tracing.cc"
|
||||
"${LIBRARY_DIR}/util/atfork_internal.cc"
|
||||
"${LIBRARY_DIR}/vendored/base64.cpp"
|
||||
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
|
||||
|
||||
@ -301,9 +306,11 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/exec/source_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/sink_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/order_by_impl.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/partition_util.cc"
|
||||
"${LIBRARY_DIR}/compute/function.cc"
|
||||
"${LIBRARY_DIR}/compute/function_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernel.cc"
|
||||
"${LIBRARY_DIR}/compute/light_array.cc"
|
||||
"${LIBRARY_DIR}/compute/registry.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
|
||||
@ -317,21 +324,28 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_compare.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_random.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_round.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_hash.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_rank.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_select_k.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_nested.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_replace.cc"
|
||||
"${LIBRARY_DIR}/compute/kernels/vector_selection.cc"
|
||||
@ -340,13 +354,15 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/compute/exec/union_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_hash.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_map.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_compare.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/key_encode.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/util.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join_dict.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/hash_join_node.cc"
|
||||
"${LIBRARY_DIR}/compute/exec/task_util.cc"
|
||||
"${LIBRARY_DIR}/compute/row/encode_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/row/grouper.cc"
|
||||
"${LIBRARY_DIR}/compute/row/compare_internal.cc"
|
||||
"${LIBRARY_DIR}/compute/row/row_internal.cc"
|
||||
|
||||
"${LIBRARY_DIR}/ipc/dictionary.cc"
|
||||
"${LIBRARY_DIR}/ipc/feather.cc"
|
||||
@ -357,7 +373,8 @@ set(ARROW_SRCS
|
||||
"${LIBRARY_DIR}/ipc/writer.cc"
|
||||
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/util.cc"
|
||||
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
|
||||
)
|
||||
|
||||
add_definitions(-DARROW_WITH_LZ4)
|
||||
|
@ -21,9 +21,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
/// For ORC format, index_nested_type = true, a nested type takes one index count. And the
|
||||
/// the start index for ORC format should be 1, since index 0 indicates to select all columns.
|
||||
template<bool index_nested_type>
|
||||
|
||||
class ArrowFieldIndexUtil
|
||||
{
|
||||
public:
|
||||
@ -46,9 +44,7 @@ public:
|
||||
calculateFieldIndices(const arrow::Schema & schema)
|
||||
{
|
||||
std::unordered_map<std::string, std::pair<int, int>> result;
|
||||
// For format like ORC, index = 0 indicates to select all columns, so we skip 0 and start
|
||||
// from 1.
|
||||
int index_start = index_nested_type;
|
||||
int index_start = 0;
|
||||
for (int i = 0; i < schema.num_fields(); ++i)
|
||||
{
|
||||
const auto & field = schema.field(i);
|
||||
@ -94,17 +90,16 @@ public:
|
||||
}
|
||||
|
||||
/// Count the number of indices for types.
|
||||
/// For orc format, index_nested_type is true, a complex type takes one index.
|
||||
size_t countIndicesForType(std::shared_ptr<arrow::DataType> type)
|
||||
{
|
||||
if (type->id() == arrow::Type::LIST)
|
||||
{
|
||||
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type()) + index_nested_type;
|
||||
return countIndicesForType(static_cast<arrow::ListType *>(type.get())->value_type());
|
||||
}
|
||||
|
||||
if (type->id() == arrow::Type::STRUCT)
|
||||
{
|
||||
int indices = index_nested_type;
|
||||
int indices = 0;
|
||||
auto * struct_type = static_cast<arrow::StructType *>(type.get());
|
||||
for (int i = 0; i != struct_type->num_fields(); ++i)
|
||||
indices += countIndicesForType(struct_type->field(i)->type());
|
||||
@ -114,7 +109,7 @@ public:
|
||||
if (type->id() == arrow::Type::MAP)
|
||||
{
|
||||
auto * map_type = static_cast<arrow::MapType *>(type.get());
|
||||
return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) + index_nested_type;
|
||||
return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) ;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@ -144,8 +139,6 @@ private:
|
||||
index_info.first = current_start_index;
|
||||
if (field_type->id() == arrow::Type::STRUCT)
|
||||
{
|
||||
current_start_index += index_nested_type;
|
||||
|
||||
auto * struct_type = static_cast<arrow::StructType *>(field_type.get());
|
||||
for (int i = 0, n = struct_type->num_fields(); i < n; ++i)
|
||||
{
|
||||
@ -161,7 +154,6 @@ private:
|
||||
const auto * list_type = static_cast<arrow::ListType *>(field_type.get());
|
||||
const auto value_field = list_type->value_field();
|
||||
auto index_snapshot = current_start_index;
|
||||
current_start_index += index_nested_type;
|
||||
calculateFieldIndices(*value_field, field_name, current_start_index, result, name_prefix);
|
||||
// The nested struct field has the same name as this list field.
|
||||
// rewrite it back to the original value.
|
||||
|
@ -129,10 +129,17 @@ void ORCBlockInputFormat::prepareReader()
|
||||
format_settings.null_as_default,
|
||||
format_settings.orc.case_insensitive_column_matching);
|
||||
|
||||
ArrowFieldIndexUtil<true> field_util(
|
||||
format_settings.orc.case_insensitive_column_matching,
|
||||
format_settings.orc.allow_missing_columns);
|
||||
include_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
|
||||
const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
|
||||
std::unordered_set<String> nested_table_names;
|
||||
if (format_settings.orc.import_nested)
|
||||
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
|
||||
|
||||
for (int i = 0; i < schema->num_fields(); ++i)
|
||||
{
|
||||
const auto & name = schema->field(i)->name();
|
||||
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
|
||||
include_indices.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
||||
|
@ -132,7 +132,7 @@ void ParquetBlockInputFormat::prepareReader()
|
||||
format_settings.null_as_default,
|
||||
format_settings.parquet.case_insensitive_column_matching);
|
||||
|
||||
ArrowFieldIndexUtil<false> field_util(
|
||||
ArrowFieldIndexUtil field_util(
|
||||
format_settings.parquet.case_insensitive_column_matching,
|
||||
format_settings.parquet.allow_missing_columns);
|
||||
column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
|
||||
|
@ -95,14 +95,14 @@ void ParquetBlockOutputFormat::consume(Chunk chunk)
|
||||
builder.version(getParquetVersion(format_settings));
|
||||
builder.compression(getParquetCompression(format_settings.parquet.output_compression_method));
|
||||
auto props = builder.build();
|
||||
auto status = parquet::arrow::FileWriter::Open(
|
||||
auto result = parquet::arrow::FileWriter::Open(
|
||||
*arrow_table->schema(),
|
||||
arrow::default_memory_pool(),
|
||||
sink,
|
||||
props, /*parquet::default_writer_properties(),*/
|
||||
&file_writer);
|
||||
if (!status.ok())
|
||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", status.ToString());
|
||||
props);
|
||||
if (!result.ok())
|
||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", result.status().ToString());
|
||||
file_writer = std::move(result.ValueOrDie());
|
||||
}
|
||||
|
||||
// TODO: calculate row_group_size depending on a number of rows and table size
|
||||
|
@ -92,8 +92,11 @@ idx10 ['This','is','a','test']
|
||||
123 1
|
||||
456 2
|
||||
=== Try load data from datapage_v2.snappy.parquet
|
||||
Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA)
|
||||
|
||||
abc 1 2 1 [1,2,3]
|
||||
abc 2 3 1 []
|
||||
abc 3 4 1 []
|
||||
\N 4 5 0 [1,2,3]
|
||||
abc 5 2 1 [1,2]
|
||||
=== Try load data from datatype-date32.parquet
|
||||
1925-01-01
|
||||
1949-10-01
|
||||
|
@ -1 +1 @@
|
||||
`a` Nullable(String), `b` Array(Nullable(Int32)), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32))
|
||||
`a` Nullable(String), `b` Nullable(Int32), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Array(Nullable(Int32))
|
||||
|
Loading…
Reference in New Issue
Block a user