Merge pull request #4484 from yandex/fix-bad-parsing-error-message

Added a test
This commit is contained in:
alexey-milovidov 2019-02-26 15:27:38 +03:00 committed by GitHub
commit 25b36a44f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 228 additions and 103 deletions

View File

@ -37,7 +37,8 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
if (!pool_map->count(connection_str)) if (!pool_map->count(connection_str))
{ {
pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str] { pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str]
{
return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str)); return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str));
})); }));
} }

View File

@ -117,3 +117,9 @@
#define NO_SANITIZE_UNDEFINED #define NO_SANITIZE_UNDEFINED
#define NO_SANITIZE_ADDRESS #define NO_SANITIZE_ADDRESS
#endif #endif
#if defined __GNUC__ && !defined __clang__
#define OPTIMIZE(x) __attribute__((__optimize__(x)))
#else
#define OPTIMIZE(x)
#endif

View File

@ -1,3 +1,5 @@
#include <Core/Defines.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/Operators.h> #include <IO/Operators.h>
@ -189,7 +191,13 @@ String CSVRowInputStream::getDiagnosticInfo()
} }
bool CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, /** gcc-7 generates wrong code with optimization level greater than 1.
* See tests: dbms/src/IO/tests/write_int.cpp
* and dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
* This is compiler bug. The bug does not present in gcc-8 and clang-8.
* Nevertheless, we don't need high optimization of this function.
*/
bool OPTIMIZE(1) CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name) WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
{ {
const char delimiter = format_settings.csv.delimiter; const char delimiter = format_settings.csv.delimiter;

View File

@ -1,3 +1,5 @@
#include <Core/Defines.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/Operators.h> #include <IO/Operators.h>
@ -162,8 +164,14 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
} }
bool TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, /** gcc-7 generates wrong code with optimization level greater than 1.
WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name) * See tests: dbms/src/IO/tests/write_int.cpp
* and dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
* This is compiler bug. The bug does not present in gcc-8 and clang-8.
* Nevertheless, we don't need high optimization of this function.
*/
bool OPTIMIZE(1) TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(
MutableColumns & columns, WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
{ {
size_t size = data_types.size(); size_t size = data_types.size();
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)

View File

@ -51,7 +51,7 @@ add_executable (hashing_read_buffer hashing_read_buffer.cpp)
target_link_libraries (hashing_read_buffer PRIVATE clickhouse_common_io) target_link_libraries (hashing_read_buffer PRIVATE clickhouse_common_io)
add_check (hashing_read_buffer) add_check (hashing_read_buffer)
add_executable (io_operators operators.cpp) add_executable (io_operators io_operators.cpp)
target_link_libraries (io_operators PRIVATE clickhouse_common_io) target_link_libraries (io_operators PRIVATE clickhouse_common_io)
if (OS_LINUX) if (OS_LINUX)

View File

@ -31,5 +31,11 @@ int main(int, char **)
std::cerr << hello << '\n'; std::cerr << hello << '\n';
} }
{
DB::WriteBufferFromFileDescriptor buf(STDOUT_FILENO);
size_t x = 11;
buf << "Column " << x << ", \n";
}
return 0; return 0;
} }

View File

@ -0,0 +1,28 @@
#include <IO/WriteHelpers.h>
#include <IO/WriteBufferFromFileDescriptor.h>
/** gcc-7 generates wrong code with -O1 -finline-small-functions -ftree-vrp
* This is compiler bug. The issue does not exist in gcc-8 or clang-8.
*/
using namespace DB;
void NO_INLINE write(WriteBuffer & out, size_t size)
{
for (size_t i = 0; i < size; ++i)
{
writeIntText(i, out);
writeChar(' ', out);
}
}
int main(int, char **)
{
WriteBufferFromFileDescriptor out(STDOUT_FILENO);
write(out, 80);
return 0;
}

View File

@ -0,0 +1 @@
Column 11, name: c11, type: UInt8, ERROR: text "a" is not like UInt8

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
# set -x
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CUR_DIR/../shell_config.sh
echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' 2>&1 | grep -F 'Column 11'

View File

@ -31,29 +31,11 @@
#include "likely.h" #include "likely.h"
#include "unaligned.h" #include "unaligned.h"
using int128_t = __int128;
using uint128_t = unsigned __int128; using uint128_t = unsigned __int128;
namespace impl namespace impl
{ {
// Using a lookup table to convert binary numbers from 0 to 99
// into ascii characters as described by Andrei Alexandrescu in
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
static const char digits[201] = "00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
static inline uint16_t const & dd(uint8_t u)
{
return reinterpret_cast<uint16_t const *>(digits)[u];
}
template <typename T> template <typename T>
static constexpr T pow10(size_t x) static constexpr T pow10(size_t x)
@ -77,64 +59,111 @@ static constexpr T pow10(size_t x)
// and compensating for rounding errors. The algorithm described below // and compensating for rounding errors. The algorithm described below
// was invented by Terje Mathisen, Norway, and not published elsewhere." // was invented by Terje Mathisen, Norway, and not published elsewhere."
template <typename UInt, bool A, UInt M, unsigned S> /// Division by constant is performed by:
struct MulInv /// 1. Adding 1 if needed;
/// 2. Multiplying by another constant;
/// 3. Shifting right by another constant.
template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
struct Division
{ {
using type = UInt; static constexpr bool add{add_};
static constexpr bool a{A}; static constexpr UInt multiplier{multiplier_};
static constexpr UInt m{M}; static constexpr unsigned shift{shift_};
static constexpr unsigned s{S};
}; };
template <int, int, class...>
struct UT;
template <int N, class T, class... Ts>
struct UT<N, N, T, Ts...>
{
using U = T;
};
template <int N, int M, class T, class... Ts>
struct UT<N, M, T, Ts...>
{
using U = typename UT<N, 2 * M, Ts...>::U;
};
template <int N>
using MI = typename UT<
N,
1,
MulInv<uint8_t, 0, 205U, 11>,
MulInv<uint16_t, 1, 41943U, 22>,
MulInv<uint32_t, 0, 3518437209U, 45>,
MulInv<uint64_t, 0, 12379400392853802749U, 90>,
MulInv<uint128_t, 0, 0, 0>>::U;
template <int N>
using U = typename MI<N>::type;
// struct QR holds the result of dividing an unsigned N-byte variable /// Select a type with appropriate number of bytes from the list of types.
// by 10^N resulting in /// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
template <size_t N> /// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
struct QR template <size_t N, typename T, typename... Ts>
struct SelectType
{ {
U<N> q; // quotient with fewer than 2*N decimal digits using Result = typename SelectType<N / 2, Ts...>::Result;
U<N / 2> r; // remainder with at most N decimal digits
}; };
template <size_t N>
QR<N> static inline split(U<N> u) template <typename T, typename... Ts>
struct SelectType<1, T, Ts...>
{ {
constexpr MI<N> mi{}; using Result = T;
U<N> q = (mi.m * (U<2 * N>(u) + mi.a)) >> mi.s; };
return {q, U<N / 2>(u - q * pow10<U<N / 2>>(N))};
/// Division by 10^N where N is the size of the type.
template <size_t N>
using DivisionBy10PowN = typename SelectType
<
N,
Division<uint8_t, 0, 205U, 11>, /// divide by 10
Division<uint16_t, 1, 41943U, 22>, /// divide by 100
Division<uint32_t, 0, 3518437209U, 45>, /// divide by 10000
Division<uint64_t, 0, 12379400392853802749ULL, 90> /// divide by 100000000
>::Result;
template <size_t N>
using UnsignedOfSize = typename SelectType
<
N,
uint8_t,
uint16_t,
uint32_t,
uint64_t,
uint128_t
>::Result;
/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
template <size_t N>
struct QuotientAndRemainder
{
UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
};
template <size_t N>
QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
{
constexpr DivisionBy10PowN<N> division;
UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
UnsignedOfSize<N / 2> remainder = value - quotient * pow10<UnsignedOfSize<N / 2>>(N);
return {quotient, remainder};
} }
template <typename T>
static inline char * out(char * p, T && obj) static inline char * outDigit(char * p, uint8_t value)
{ {
memcpy(p, reinterpret_cast<const void *>(&obj), sizeof(T)); *p = '0' + value;
p += sizeof(T); ++p;
return p; return p;
} }
struct convert // Using a lookup table to convert binary numbers from 0 to 99
// into ascii characters as described by Andrei Alexandrescu in
// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
static const char digits[201] = "00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
static inline char * outTwoDigits(char * p, uint8_t value)
{ {
memcpy(p, &digits[value * 2], 2);
p += 2;
return p;
}
namespace convert
{
template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
// head: find most significant digit, skip leading zeros // head: find most significant digit, skip leading zeros
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
@ -142,35 +171,51 @@ struct convert
// "x" contains quotient and remainder after division by 10^N // "x" contains quotient and remainder after division by 10^N
// quotient is less than 10^N // quotient is less than 10^N
template <size_t N> template <size_t N>
static inline char * head(char * p, QR<N> x) static inline char * head(char * p, QuotientAndRemainder<N> x)
{ {
return tail(head(p, U<N / 2>(x.q)), x.r); p = head(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
} }
// "u" is less than 10^2*N // "u" is less than 10^2*N
template <typename UInt, size_t N = sizeof(UInt)> template <typename UInt, size_t N>
static inline char * head(char * p, UInt u) static inline char * head(char * p, UInt u)
{ {
return (u < pow10<U<N>>(N) ? (head(p, U<N / 2>(u))) : (head<N>(p, split<N>(u)))); return u < pow10<UnsignedOfSize<N>>(N)
? head(p, UnsignedOfSize<N / 2>(u))
: head<N>(p, split<N>(u));
} }
// recursion base case, selected when "u" is one byte // recursion base case, selected when "u" is one byte
static inline char * head(char * p, U<1> u) { return (u < 10 ? (out<char>(p, '0' + u)) : (out(p, dd(u)))); } template <>
inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{
return u < 10
? outDigit(p, u)
: outTwoDigits(p, u);
}
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
// tail: produce all digits including leading zeros // tail: produce all digits including leading zeros
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
// recursive step, "u" is less than 10^2*N // recursive step, "u" is less than 10^2*N
template <typename UInt, size_t N = sizeof(UInt)> template <typename UInt, size_t N>
static inline char * tail(char * p, UInt u) static inline char * tail(char * p, UInt u)
{ {
QR<N> x = split<N>(u); QuotientAndRemainder<N> x = split<N>(u);
return tail(tail(p, U<N / 2>(x.q)), x.r); p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
p = tail(p, x.remainder);
return p;
} }
// recursion base case, selected when "u" is one byte // recursion base case, selected when "u" is one byte
static inline char * tail(char * p, U<1> u) { return out(p, dd(u)); } template <>
inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{
return outTwoDigits(p, u);
}
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
// large values are >= 10^2*N // large values are >= 10^2*N
@ -178,10 +223,13 @@ struct convert
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
template <size_t N> template <size_t N>
static inline char * large(char * p, QR<N> x) static inline char * large(char * p, QuotientAndRemainder<N> x)
{ {
QR<N> y = split<N>(x.q); QuotientAndRemainder<N> y = split<N>(x.quotient);
return tail(tail(head(p, U<N / 2>(y.q)), y.r), x.r); p = head(p, UnsignedOfSize<N / 2>(y.quotient));
p = tail(p, y.remainder);
p = tail(p, x.remainder);
return p;
} }
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
@ -192,20 +240,29 @@ struct convert
template <typename UInt, size_t N = sizeof(UInt)> template <typename UInt, size_t N = sizeof(UInt)>
static inline char * itoa(char * p, UInt u) static inline char * itoa(char * p, UInt u)
{ {
if (u < pow10<U<N>>(N)) if (u < pow10<UnsignedOfSize<N>>(N))
return head(p, U<N / 2>(u)); return head(p, UnsignedOfSize<N / 2>(u));
QR<N> x = split<N>(u); QuotientAndRemainder<N> x = split<N>(u);
return (u < pow10<U<N>>(2 * N) ? (head<N>(p, x)) : (large<N>(p, x)));
return u < pow10<UnsignedOfSize<N>>(2 * N)
? head<N>(p, x)
: large<N>(p, x);
} }
// selected when "u" is one byte // selected when "u" is one byte
static inline char * itoa(char * p, U<1> u) template <>
inline char * itoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
{ {
if (u < 10) if (u < 10)
return out<char>(p, '0' + u); return outDigit(p, u);
if (u < 100) else if (u < 100)
return out(p, dd(u)); return outTwoDigits(p, u);
return out(out<char>(p, '0' + u / 100), dd(u % 100)); else
{
p = outDigit(p, u / 100);
p = outTwoDigits(p, u % 100);
return p;
}
} }
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
@ -213,14 +270,14 @@ struct convert
//===----------------------------------------------------------===// //===----------------------------------------------------------===//
// itoa: handle unsigned integral operands (selected by SFINAE) // itoa: handle unsigned integral operands (selected by SFINAE)
template <typename U, std::enable_if_t<not std::is_signed<U>::value && std::is_integral<U>::value> * = nullptr> template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
static inline char * itoa(U u, char * p) static inline char * itoa(U u, char * p)
{ {
return convert::itoa(p, u); return convert::itoa(p, u);
} }
// itoa: handle signed integral operands (selected by SFINAE) // itoa: handle signed integral operands (selected by SFINAE)
template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed<I>::value && std::is_integral<I>::value> * = nullptr> template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
static inline char * itoa(I i, char * p) static inline char * itoa(I i, char * p)
{ {
// Need "mask" to be filled with a copy of the sign bit. // Need "mask" to be filled with a copy of the sign bit.
@ -230,19 +287,19 @@ struct convert
// Use a conditional expression to be portable, // Use a conditional expression to be portable,
// a good optimizing compiler generates an arithmetic right shift // a good optimizing compiler generates an arithmetic right shift
// and avoids the conditional branch. // and avoids the conditional branch.
U<N> mask = i < 0 ? ~U<N>(0) : 0; UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
// Now get the absolute value of "i" and cast to unsigned type U<N>. // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
// Cannot use std::abs() because the result is undefined // Cannot use std::abs() because the result is undefined
// in 2's complement systems for the most-negative value. // in 2's complement systems for the most-negative value.
// Want to avoid conditional branch for performance reasons since // Want to avoid conditional branch for performance reasons since
// CPU branch prediction will be ineffective when negative values // CPU branch prediction will be ineffective when negative values
// occur randomly. // occur randomly.
// Let "u" be "i" cast to unsigned type U<N>. // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
// Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
// This yields the absolute value with the desired type without // This yields the absolute value with the desired type without
// using a conditional branch and without invoking undefined or // using a conditional branch and without invoking undefined or
// implementation defined behavior: // implementation defined behavior:
U<N> u = ((2 * U<N>(i)) & ~mask) - U<N>(i); UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
// Unconditionally store a minus sign when producing digits // Unconditionally store a minus sign when producing digits
// in a forward direction and increment the pointer only if // in a forward direction and increment the pointer only if
// the value is in fact negative. // the value is in fact negative.
@ -298,12 +355,12 @@ static inline char * writeUIntText(uint128_t x, char * p)
const auto i = x % 100; const auto i = x % 100;
x /= 100; x /= 100;
pp -= 2; pp -= 2;
unalignedStore(pp, dd(i)); outTwoDigits(pp, i);
} }
if (x < 10) if (x < 10)
*p = '0' + x; *p = '0' + x;
else else
unalignedStore(p, dd(x)); outTwoDigits(p, x);
return p + len; return p + len;
} }
@ -313,9 +370,9 @@ static inline char * writeLeadingMinus(char * pos)
return pos + 1; return pos + 1;
} }
static inline char * writeSIntText(__int128 x, char * pos) static inline char * writeSIntText(int128_t x, char * pos)
{ {
static const __int128 min_int128 = __int128(0x8000000000000000ll) << 64; static const int128_t min_int128 = int128_t(0x8000000000000000ll) << 64;
if (unlikely(x == min_int128)) if (unlikely(x == min_int128))
{ {
@ -328,7 +385,7 @@ static inline char * writeSIntText(__int128 x, char * pos)
x = -x; x = -x;
pos = writeLeadingMinus(pos); pos = writeLeadingMinus(pos);
} }
return writeUIntText(static_cast<unsigned __int128>(x), pos); return writeUIntText(static_cast<uint128_t>(x), pos);
} }
} }
@ -339,12 +396,14 @@ char * itoa(I i, char * p)
return impl::convert::itoa(i, p); return impl::convert::itoa(i, p);
} }
static inline char * itoa(uint128_t i, char * p) template <>
inline char * itoa<uint128_t>(uint128_t i, char * p)
{ {
return impl::writeUIntText(i, p); return impl::writeUIntText(i, p);
} }
static inline char * itoa(__int128 i, char * p) template <>
inline char * itoa<int128_t>(int128_t i, char * p)
{ {
return impl::writeSIntText(i, p); return impl::writeSIntText(i, p);
} }