Merge pull request #4484 from yandex/fix-bad-parsing-error-message

Added a test
2024-11-17 21:24:28 +00:00 · 2019-02-26 15:27:38 +03:00 · 2019-02-26 15:27:38 +03:00 · 25b36a44f3
commit 25b36a44f3
parent 3105d83861 8e59998dfa
10 changed files with 228 additions and 103 deletions
--- a/dbms/programs/odbc-bridge/MainHandler.cpp
+++ b/dbms/programs/odbc-bridge/MainHandler.cpp
@ -37,7 +37,8 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
    std::lock_guard lock(mutex);
    if (!pool_map->count(connection_str))
    {
-        pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str] {
+        pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str]
        {
            return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str));
        }));
    }
--- a/dbms/src/Core/Defines.h
+++ b/dbms/src/Core/Defines.h
@ -117,3 +117,9 @@
    #define NO_SANITIZE_UNDEFINED
    #define NO_SANITIZE_ADDRESS
 #endif
 #if defined __GNUC__ && !defined __clang__
    #define OPTIMIZE(x) __attribute__((__optimize__(x)))
 #else
    #define OPTIMIZE(x)
 #endif
--- a/dbms/src/Formats/CSVRowInputStream.cpp
+++ b/dbms/src/Formats/CSVRowInputStream.cpp
@ -1,3 +1,5 @@
 #include <Core/Defines.h>
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
@ -189,7 +191,13 @@ String CSVRowInputStream::getDiagnosticInfo()
 }
-bool CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
+/** gcc-7 generates wrong code with optimization level greater than 1.
  * See tests: dbms/src/IO/tests/write_int.cpp
  *  and dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
  * This is compiler bug. The bug does not present in gcc-8 and clang-8.
  * Nevertheless, we don't need high optimization of this function.
  */
 bool OPTIMIZE(1) CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
    WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
 {
    const char delimiter = format_settings.csv.delimiter;
--- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp
+++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp
@ -1,3 +1,5 @@
 #include <Core/Defines.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
@ -162,8 +164,14 @@ String TabSeparatedRowInputStream::getDiagnosticInfo()
 }
-bool TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumns & columns,
+/** gcc-7 generates wrong code with optimization level greater than 1.
-    WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
+  * See tests: dbms/src/IO/tests/write_int.cpp
  *  and dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
  * This is compiler bug. The bug does not present in gcc-8 and clang-8.
  * Nevertheless, we don't need high optimization of this function.
  */
 bool OPTIMIZE(1) TabSeparatedRowInputStream::parseRowAndPrintDiagnosticInfo(
    MutableColumns & columns, WriteBuffer & out, size_t max_length_of_column_name, size_t max_length_of_data_type_name)
 {
    size_t size = data_types.size();
    for (size_t i = 0; i < size; ++i)
--- a/dbms/src/IO/tests/CMakeLists.txt
+++ b/dbms/src/IO/tests/CMakeLists.txt
@ -51,7 +51,7 @@ add_executable (hashing_read_buffer hashing_read_buffer.cpp)
 target_link_libraries (hashing_read_buffer PRIVATE clickhouse_common_io)
 add_check (hashing_read_buffer)
-add_executable (io_operators operators.cpp)
+add_executable (io_operators io_operators.cpp)
 target_link_libraries (io_operators PRIVATE clickhouse_common_io)
 if (OS_LINUX)
--- a/dbms/src/IO/tests/io_operators.cpp
+++ b/dbms/src/IO/tests/io_operators.cpp
@ -31,5 +31,11 @@ int main(int, char **)
        std::cerr << hello << '\n';
    }
    {
        DB::WriteBufferFromFileDescriptor buf(STDOUT_FILENO);
        size_t x = 11;
        buf << "Column " << x << ", \n";
    }
    return 0;
 }
--- a/dbms/src/IO/tests/write_int.cpp
+++ b/dbms/src/IO/tests/write_int.cpp
@ -0,0 +1,28 @@
 #include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 /** gcc-7 generates wrong code with -O1 -finline-small-functions -ftree-vrp
  * This is compiler bug. The issue does not exist in gcc-8 or clang-8.
  */
 using namespace DB;
 void NO_INLINE write(WriteBuffer & out, size_t size)
 {
    for (size_t i = 0; i < size; ++i)
    {
        writeIntText(i, out);
        writeChar(' ', out);
    }
 }
 int main(int, char **)
 {
    WriteBufferFromFileDescriptor out(STDOUT_FILENO);
    write(out, 80);
    return 0;
 }
--- a/dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.reference
+++ b/dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.reference
@ -0,0 +1 @@
 Column 11,  name: c11, type: UInt8, ERROR: text "a" is not like UInt8
--- a/dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
+++ b/dbms/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh
@ -0,0 +1,8 @@
 #!/usr/bin/env bash
 # set -x
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . $CUR_DIR/../shell_config.sh
 echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' 2>&1 | grep -F 'Column 11'
--- a/libs/libcommon/include/common/itoa.h
+++ b/libs/libcommon/include/common/itoa.h
@ -31,29 +31,11 @@
 #include "likely.h"
 #include "unaligned.h"
 using int128_t = __int128;
 using uint128_t = unsigned __int128;
 namespace impl
 {
 // Using a lookup table to convert binary numbers from 0 to 99
 // into ascii characters as described by Andrei Alexandrescu in
 // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
 static const char digits[201] = "00010203040506070809"
                                "10111213141516171819"
                                "20212223242526272829"
                                "30313233343536373839"
                                "40414243444546474849"
                                "50515253545556575859"
                                "60616263646566676869"
                                "70717273747576777879"
                                "80818283848586878889"
                                "90919293949596979899";
 static inline uint16_t const & dd(uint8_t u)
 {
    return reinterpret_cast<uint16_t const *>(digits)[u];
 }
 template <typename T>
 static constexpr T pow10(size_t x)
@ -77,64 +59,111 @@ static constexpr T pow10(size_t x)
 // and compensating for rounding errors. The algorithm described below
 // was invented by Terje Mathisen, Norway, and not published elsewhere."
-template <typename UInt, bool A, UInt M, unsigned S>
+/// Division by constant is performed by:
-struct MulInv
+/// 1. Adding 1 if needed;
 /// 2. Multiplying by another constant;
 /// 3. Shifting right by another constant.
 template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
 struct Division
 {
-    using type = UInt;
+    static constexpr bool add{add_};
-    static constexpr bool a{A};
+    static constexpr UInt multiplier{multiplier_};
-    static constexpr UInt m{M};
+    static constexpr unsigned shift{shift_};
    static constexpr unsigned s{S};
 };
 template <int, int, class...>
 struct UT;
 template <int N, class T, class... Ts>
 struct UT<N, N, T, Ts...>
 {
    using U = T;
 };
 template <int N, int M, class T, class... Ts>
 struct UT<N, M, T, Ts...>
 {
    using U = typename UT<N, 2 * M, Ts...>::U;
 };
 template <int N>
 using MI = typename UT<
    N,
    1,
    MulInv<uint8_t, 0, 205U, 11>,
    MulInv<uint16_t, 1, 41943U, 22>,
    MulInv<uint32_t, 0, 3518437209U, 45>,
    MulInv<uint64_t, 0, 12379400392853802749U, 90>,
    MulInv<uint128_t, 0, 0, 0>>::U;
 template <int N>
 using U = typename MI<N>::type;
-// struct QR holds the result of dividing an unsigned N-byte variable
+/// Select a type with appropriate number of bytes from the list of types.
-// by 10^N resulting in
+/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
-template <size_t N>
+/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
-struct QR
+template <size_t N, typename T, typename... Ts>
 struct SelectType
 {
-    U<N> q; // quotient with fewer than 2*N decimal digits
+    using Result = typename SelectType<N / 2, Ts...>::Result;
    U<N / 2> r; // remainder with at most N decimal digits
 };
-template <size_t N>
+
-QR<N> static inline split(U<N> u)
+template <typename T, typename... Ts>
 struct SelectType<1, T, Ts...>
 {
-    constexpr MI<N> mi{};
+    using Result = T;
-    U<N> q = (mi.m * (U<2 * N>(u) + mi.a)) >> mi.s;
+};
-    return {q, U<N / 2>(u - q * pow10<U<N / 2>>(N))};
+
 /// Division by 10^N where N is the size of the type.
 template <size_t N>
 using DivisionBy10PowN = typename SelectType
 <
    N,
    Division<uint8_t, 0, 205U, 11>,                           /// divide by 10
    Division<uint16_t, 1, 41943U, 22>,                        /// divide by 100
    Division<uint32_t, 0, 3518437209U, 45>,                   /// divide by 10000
    Division<uint64_t, 0, 12379400392853802749ULL, 90>        /// divide by 100000000
 >::Result;
 template <size_t N>
 using UnsignedOfSize = typename SelectType
 <
    N,
    uint8_t,
    uint16_t,
    uint32_t,
    uint64_t,
    uint128_t
 >::Result;
 /// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
 template <size_t N>
 struct QuotientAndRemainder
 {
    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
 };
 template <size_t N>
 QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
 {
    constexpr DivisionBy10PowN<N> division;
    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
    UnsignedOfSize<N / 2> remainder = value - quotient * pow10<UnsignedOfSize<N / 2>>(N);
    return {quotient, remainder};
 }
-template <typename T>
+
-static inline char * out(char * p, T && obj)
+static inline char * outDigit(char * p, uint8_t value)
 {
-    memcpy(p, reinterpret_cast<const void *>(&obj), sizeof(T));
+    *p = '0' + value;
-    p += sizeof(T);
+    ++p;
    return p;
 }
-struct convert
+// Using a lookup table to convert binary numbers from 0 to 99
 // into ascii characters as described by Andrei Alexandrescu in
 // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
 static const char digits[201] = "00010203040506070809"
                                "10111213141516171819"
                                "20212223242526272829"
                                "30313233343536373839"
                                "40414243444546474849"
                                "50515253545556575859"
                                "60616263646566676869"
                                "70717273747576777879"
                                "80818283848586878889"
                                "90919293949596979899";
 static inline char * outTwoDigits(char * p, uint8_t value)
 {
    memcpy(p, &digits[value * 2], 2);
    p += 2;
    return p;
 }
 namespace convert
 {
    template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
    template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
    //===----------------------------------------------------------===//
    //     head: find most significant digit, skip leading zeros
    //===----------------------------------------------------------===//
@ -142,35 +171,51 @@ struct convert
    // "x" contains quotient and remainder after division by 10^N
    // quotient is less than 10^N
    template <size_t N>
-    static inline char * head(char * p, QR<N> x)
+    static inline char * head(char * p, QuotientAndRemainder<N> x)
    {
-        return tail(head(p, U<N / 2>(x.q)), x.r);
+        p = head(p, UnsignedOfSize<N / 2>(x.quotient));
        p = tail(p, x.remainder);
        return p;
    }
    // "u" is less than 10^2*N
-    template <typename UInt, size_t N = sizeof(UInt)>
+    template <typename UInt, size_t N>
    static inline char * head(char * p, UInt u)
    {
-        return (u < pow10<U<N>>(N) ? (head(p, U<N / 2>(u))) : (head<N>(p, split<N>(u))));
+        return u < pow10<UnsignedOfSize<N>>(N)
            ? head(p, UnsignedOfSize<N / 2>(u))
            : head<N>(p, split<N>(u));
    }
    // recursion base case, selected when "u" is one byte
-    static inline char * head(char * p, U<1> u) { return (u < 10 ? (out<char>(p, '0' + u)) : (out(p, dd(u)))); }
+    template <>
    inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
    {
        return u < 10
            ? outDigit(p, u)
            : outTwoDigits(p, u);
    }
    //===----------------------------------------------------------===//
    //     tail: produce all digits including leading zeros
    //===----------------------------------------------------------===//
    // recursive step, "u" is less than 10^2*N
-    template <typename UInt, size_t N = sizeof(UInt)>
+    template <typename UInt, size_t N>
    static inline char * tail(char * p, UInt u)
    {
-        QR<N> x = split<N>(u);
+        QuotientAndRemainder<N> x = split<N>(u);
-        return tail(tail(p, U<N / 2>(x.q)), x.r);
+        p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
        p = tail(p, x.remainder);
        return p;
    }
    // recursion base case, selected when "u" is one byte
-    static inline char * tail(char * p, U<1> u) { return out(p, dd(u)); }
+    template <>
    inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
    {
        return outTwoDigits(p, u);
    }
    //===----------------------------------------------------------===//
    // large values are >= 10^2*N
@ -178,10 +223,13 @@ struct convert
    //===----------------------------------------------------------===//
    template <size_t N>
-    static inline char * large(char * p, QR<N> x)
+    static inline char * large(char * p, QuotientAndRemainder<N> x)
    {
-        QR<N> y = split<N>(x.q);
+        QuotientAndRemainder<N> y = split<N>(x.quotient);
-        return tail(tail(head(p, U<N / 2>(y.q)), y.r), x.r);
+        p = head(p, UnsignedOfSize<N / 2>(y.quotient));
        p = tail(p, y.remainder);
        p = tail(p, x.remainder);
        return p;
    }
    //===----------------------------------------------------------===//
@ -192,20 +240,29 @@ struct convert
    template <typename UInt, size_t N = sizeof(UInt)>
    static inline char * itoa(char * p, UInt u)
    {
-        if (u < pow10<U<N>>(N))
+        if (u < pow10<UnsignedOfSize<N>>(N))
-            return head(p, U<N / 2>(u));
+            return head(p, UnsignedOfSize<N / 2>(u));
-        QR<N> x = split<N>(u);
+        QuotientAndRemainder<N> x = split<N>(u);
-        return (u < pow10<U<N>>(2 * N) ? (head<N>(p, x)) : (large<N>(p, x)));
+
        return u < pow10<UnsignedOfSize<N>>(2 * N)
            ? head<N>(p, x)
            : large<N>(p, x);
    }
    // selected when "u" is one byte
-    static inline char * itoa(char * p, U<1> u)
+    template <>
    inline char * itoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
    {
        if (u < 10)
-            return out<char>(p, '0' + u);
+            return outDigit(p, u);
-        if (u < 100)
+        else if (u < 100)
-            return out(p, dd(u));
+            return outTwoDigits(p, u);
-        return out(out<char>(p, '0' + u / 100), dd(u % 100));
+        else
        {
            p = outDigit(p, u / 100);
            p = outTwoDigits(p, u % 100);
            return p;
        }
    }
    //===----------------------------------------------------------===//
@ -213,14 +270,14 @@ struct convert
    //===----------------------------------------------------------===//
    // itoa: handle unsigned integral operands (selected by SFINAE)
-    template <typename U, std::enable_if_t<not std::is_signed<U>::value && std::is_integral<U>::value> * = nullptr>
+    template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
    static inline char * itoa(U u, char * p)
    {
        return convert::itoa(p, u);
    }
    // itoa: handle signed integral operands (selected by SFINAE)
-    template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed<I>::value && std::is_integral<I>::value> * = nullptr>
+    template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
    static inline char * itoa(I i, char * p)
    {
        // Need "mask" to be filled with a copy of the sign bit.
@ -230,19 +287,19 @@ struct convert
        // Use a conditional expression to be portable,
        // a good optimizing compiler generates an arithmetic right shift
        // and avoids the conditional branch.
-        U<N> mask = i < 0 ? ~U<N>(0) : 0;
+        UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
-        // Now get the absolute value of "i" and cast to unsigned type U<N>.
+        // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
        // Cannot use std::abs() because the result is undefined
        // in 2's complement systems for the most-negative value.
        // Want to avoid conditional branch for performance reasons since
        // CPU branch prediction will be ineffective when negative values
        // occur randomly.
-        // Let "u" be "i" cast to unsigned type U<N>.
+        // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
        // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
        // This yields the absolute value with the desired type without
        // using a conditional branch and without invoking undefined or
        // implementation defined behavior:
-        U<N> u = ((2 * U<N>(i)) & ~mask) - U<N>(i);
+        UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
        // Unconditionally store a minus sign when producing digits
        // in a forward direction and increment the pointer only if
        // the value is in fact negative.
@ -298,12 +355,12 @@ static inline char * writeUIntText(uint128_t x, char * p)
        const auto i = x % 100;
        x /= 100;
        pp -= 2;
-        unalignedStore(pp, dd(i));
+        outTwoDigits(pp, i);
    }
    if (x < 10)
        *p = '0' + x;
    else
-        unalignedStore(p, dd(x));
+        outTwoDigits(p, x);
    return p + len;
 }
@ -313,9 +370,9 @@ static inline char * writeLeadingMinus(char * pos)
    return pos + 1;
 }
-static inline char * writeSIntText(__int128 x, char * pos)
+static inline char * writeSIntText(int128_t x, char * pos)
 {
-    static const __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+    static const int128_t min_int128 = int128_t(0x8000000000000000ll) << 64;
    if (unlikely(x == min_int128))
    {
@ -328,7 +385,7 @@ static inline char * writeSIntText(__int128 x, char * pos)
        x = -x;
        pos = writeLeadingMinus(pos);
    }
-    return writeUIntText(static_cast<unsigned __int128>(x), pos);
+    return writeUIntText(static_cast<uint128_t>(x), pos);
 }
 }
@ -339,12 +396,14 @@ char * itoa(I i, char * p)
    return impl::convert::itoa(i, p);
 }
-static inline char * itoa(uint128_t i, char * p)
+template <>
 inline char * itoa<uint128_t>(uint128_t i, char * p)
 {
    return impl::writeUIntText(i, p);
 }
-static inline char * itoa(__int128 i, char * p)
+template <>
 inline char * itoa<int128_t>(int128_t i, char * p)
 {
    return impl::writeSIntText(i, p);
 }
		`@ -0,0 +1 @@`
							`Column 11, name: c11, type: UInt8, ERROR: text "a" is not like UInt8`