Merge branch 'master' into operator

This commit is contained in:
flynn 2023-07-25 11:21:40 +08:00 committed by GitHub
commit 059025f452
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 767 additions and 521 deletions

View File

@ -57,7 +57,7 @@ public:
URI(); URI();
/// Creates an empty URI. /// Creates an empty URI.
explicit URI(const std::string & uri); explicit URI(const std::string & uri, bool disable_url_encoding = false);
/// Parses an URI from the given string. Throws a /// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid. /// SyntaxException if the uri is not valid.
@ -350,6 +350,10 @@ protected:
static const std::string ILLEGAL; static const std::string ILLEGAL;
private: private:
void encodePath(std::string & encodedStr) const;
void decodePath(const std::string & encodedStr);
std::string _scheme; std::string _scheme;
std::string _userInfo; std::string _userInfo;
std::string _host; std::string _host;
@ -357,6 +361,8 @@ private:
std::string _path; std::string _path;
std::string _query; std::string _query;
std::string _fragment; std::string _fragment;
bool _disable_url_encoding = false;
}; };

View File

@ -36,8 +36,8 @@ URI::URI():
} }
URI::URI(const std::string& uri): URI::URI(const std::string& uri, bool decode_and_encode_path):
_port(0) _port(0), _disable_url_encoding(decode_and_encode_path)
{ {
parse(uri); parse(uri);
} }
@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port), _port(uri._port),
_path(uri._path), _path(uri._path),
_query(uri._query), _query(uri._query),
_fragment(uri._fragment) _fragment(uri._fragment),
_disable_url_encoding(uri._disable_url_encoding)
{ {
} }
@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port), _port(baseURI._port),
_path(baseURI._path), _path(baseURI._path),
_query(baseURI._query), _query(baseURI._query),
_fragment(baseURI._fragment) _fragment(baseURI._fragment),
_disable_url_encoding(baseURI._disable_url_encoding)
{ {
resolve(relativeURI); resolve(relativeURI);
} }
@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path; _path = uri._path;
_query = uri._query; _query = uri._query;
_fragment = uri._fragment; _fragment = uri._fragment;
_disable_url_encoding = uri._disable_url_encoding;
} }
return *this; return *this;
} }
@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path); std::swap(_path, uri._path);
std::swap(_query, uri._query); std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment); std::swap(_fragment, uri._fragment);
std::swap(_disable_url_encoding, uri._disable_url_encoding);
} }
@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri; std::string uri;
if (isRelative()) if (isRelative())
{ {
encode(_path, RESERVED_PATH, uri); encodePath(uri);
} }
else else
{ {
@ -217,7 +221,7 @@ std::string URI::toString() const
{ {
if (!auth.empty() && _path[0] != '/') if (!auth.empty() && _path[0] != '/')
uri += '/'; uri += '/';
encode(_path, RESERVED_PATH, uri); encodePath(uri);
} }
else if (!_query.empty() || !_fragment.empty()) else if (!_query.empty() || !_fragment.empty())
{ {
@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path) void URI::setPath(const std::string& path)
{ {
_path.clear(); _path.clear();
decode(path, _path); decodePath(path);
} }
@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const std::string URI::getPathEtc() const
{ {
std::string pathEtc; std::string pathEtc;
encode(_path, RESERVED_PATH, pathEtc); encodePath(pathEtc);
if (!_query.empty()) if (!_query.empty())
{ {
pathEtc += '?'; pathEtc += '?';
@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const std::string URI::getPathAndQuery() const
{ {
std::string pathAndQuery; std::string pathAndQuery;
encode(_path, RESERVED_PATH, pathAndQuery); encodePath(pathAndQuery);
if (!_query.empty()) if (!_query.empty())
{ {
pathAndQuery += '?'; pathAndQuery += '?';
@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
} }
} }
void URI::encodePath(std::string & encodedStr) const
{
if (_disable_url_encoding)
encodedStr = _path;
else
encode(_path, RESERVED_PATH, encodedStr);
}
void URI::decodePath(const std::string & encodedStr)
{
if (_disable_url_encoding)
_path = encodedStr;
else
decode(encodedStr, _path);
}
bool URI::isWellKnownPort() const bool URI::isWellKnownPort() const
{ {
@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{ {
std::string path; std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++; while (it != end && *it != '?' && *it != '#') path += *it++;
decode(path, _path); decodePath(path);
} }

View File

@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings} ## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.

View File

@ -3468,6 +3468,12 @@ Possible values:
Default value: `0`. Default value: `0`.
## disable_url_encoding {#disable_url_encoding}
Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Disabled by default.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.

View File

@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings} ## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
**See Also** **See Also**

View File

@ -623,6 +623,7 @@ class IColumn;
M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \

View File

@ -1183,15 +1183,9 @@ public:
|| (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size())
|| (arguments[0]->equals(*arguments[1])))) || (arguments[0]->equals(*arguments[1]))))
{ {
try if (!tryGetLeastSupertype(arguments))
{
getLeastSupertype(arguments);
}
catch (const Exception &)
{
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})" throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})"
" of function {}", arguments[0]->getName(), arguments[1]->getName(), getName()); " of function {}", arguments[0]->getName(), arguments[1]->getName(), getName());
}
} }
if (left_tuple && right_tuple) if (left_tuple && right_tuple)

View File

@ -0,0 +1,376 @@
#include <Functions/GregorianDate.h>
#include <Common/Exception.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_FORMAT_DATETIME;
extern const int LOGICAL_ERROR;
}
namespace
{
inline constexpr bool is_leap_year(int32_t year)
{
return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
}
inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
{
switch (month)
{
case 1: return 31;
case 2: return is_leap_year ? 29 : 28;
case 3: return 31;
case 4: return 30;
case 5: return 31;
case 6: return 30;
case 7: return 31;
case 8: return 31;
case 9: return 30;
case 10: return 31;
case 11: return 30;
case 12: return 31;
default:
std::terminate();
}
}
/** Integer division truncated toward negative infinity.
*/
template <typename I, typename J>
inline constexpr I div(I x, J y)
{
const auto y_cast = static_cast<I>(y);
if (x > 0 && y_cast < 0)
return ((x - 1) / y_cast) - 1;
else if (x < 0 && y_cast > 0)
return ((x + 1) / y_cast) - 1;
else
return x / y_cast;
}
/** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
*/
template <typename I, typename J>
inline constexpr I mod(I x, J y)
{
const auto y_cast = static_cast<I>(y);
const auto r = x % y_cast;
if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
return r == 0 ? static_cast<I>(0) : r + y_cast;
else
return r;
}
/** Like std::min(), but the type of operands may differ.
*/
template <typename I, typename J>
inline constexpr I min(I x, J y)
{
const auto y_cast = static_cast<I>(y);
return x < y_cast ? x : y_cast;
}
inline char readDigit(ReadBuffer & in)
{
char c;
if (!in.read(c))
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
else if (c < '0' || c > '9')
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
else
return c - '0';
}
inline bool tryReadDigit(ReadBuffer & in, char & c)
{
if (in.read(c) && c >= '0' && c <= '9')
{
c -= '0';
return true;
}
return false;
}
}
void GregorianDate::init(ReadBuffer & in)
{
year_ = readDigit(in) * 1000
+ readDigit(in) * 100
+ readDigit(in) * 10
+ readDigit(in);
assertChar('-', in);
month_ = readDigit(in) * 10
+ readDigit(in);
assertChar('-', in);
day_of_month_ = readDigit(in) * 10
+ readDigit(in);
assertEOF(in);
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {}).");
}
bool GregorianDate::tryInit(ReadBuffer & in)
{
char c[8];
if ( !tryReadDigit(in, c[0])
|| !tryReadDigit(in, c[1])
|| !tryReadDigit(in, c[2])
|| !tryReadDigit(in, c[3])
|| !checkChar('-', in)
|| !tryReadDigit(in, c[4])
|| !tryReadDigit(in, c[5])
|| !checkChar('-', in)
|| !tryReadDigit(in, c[6])
|| !tryReadDigit(in, c[7])
|| !in.eof())
{
return false;
}
year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3];
month_ = c[4] * 10 + c[5];
day_of_month_ = c[6] * 10 + c[7];
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
return false;
return true;
}
GregorianDate::GregorianDate(ReadBuffer & in)
{
init(in);
}
void GregorianDate::init(int64_t modified_julian_day)
{
const OrdinalDate ord(modified_julian_day);
const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.dayOfMonth();
}
bool GregorianDate::tryInit(int64_t modified_julian_day)
{
OrdinalDate ord;
if (!ord.tryInit(modified_julian_day))
return false;
MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.dayOfMonth();
return true;
}
GregorianDate::GregorianDate(int64_t modified_julian_day)
{
init(modified_julian_day);
}
int64_t GregorianDate::toModifiedJulianDay() const
{
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(is_leap_year(year_));
const OrdinalDate ord(year_, day_of_year);
return ord.toModifiedJulianDay();
}
bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const
{
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(is_leap_year(year_));
OrdinalDate ord;
if (!ord.tryInit(year_, day_of_year))
return false;
res = ord.toModifiedJulianDay();
return true;
}
template <typename ReturnType>
ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
{
if (year_ < 0 || year_ > 9999)
{
if constexpr (std::is_same_v<ReturnType, void>)
throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
"Impossible to stringify: year too big or small: {}", year_);
else
return false;
}
else
{
auto y = year_;
writeChar('0' + y / 1000, buf); y %= 1000;
writeChar('0' + y / 100, buf); y %= 100;
writeChar('0' + y / 10, buf); y %= 10;
writeChar('0' + y , buf);
writeChar('-', buf);
auto m = month_;
writeChar('0' + m / 10, buf); m %= 10;
writeChar('0' + m , buf);
writeChar('-', buf);
auto d = day_of_month_;
writeChar('0' + d / 10, buf); d %= 10;
writeChar('0' + d , buf);
}
return ReturnType(true);
}
std::string GregorianDate::toString() const
{
WriteBufferFromOwnString buf;
write(buf);
return buf.str();
}
void OrdinalDate::init(int32_t year, uint16_t day_of_year)
{
year_ = year;
day_of_year_ = day_of_year;
if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year);
}
bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year)
{
year_ = year;
day_of_year_ = day_of_year;
return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365));
}
void OrdinalDate::init(int64_t modified_julian_day)
{
if (!tryInit(modified_julian_day))
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
}
bool OrdinalDate::tryInit(int64_t modified_julian_day)
{
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
if (modified_julian_day < -678941)
return false;
if (modified_julian_day > 2973119)
return false;
const auto a = modified_julian_day + 678575;
const auto quad_cent = div(a, 146097);
const auto b = mod(a, 146097);
const auto cent = min(div(b, 36524), 3);
const auto c = b - cent * 36524;
const auto quad = div(c, 1461);
const auto d = mod(c, 1461);
const auto y = min(div(d, 365), 3);
day_of_year_ = d - y * 365 + 1;
year_ = static_cast<int32_t>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
return true;
}
OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
{
init(year, day_of_year);
}
OrdinalDate::OrdinalDate(int64_t modified_julian_day)
{
init(modified_julian_day);
}
int64_t OrdinalDate::toModifiedJulianDay() const noexcept
{
const auto y = year_ - 1;
return day_of_year_
+ 365 * y
+ div(y, 4)
- div(y, 100)
+ div(y, 400)
- 678576;
}
MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
: month_(month)
, day_of_month_(day_of_month)
{
if (month < 1 || month > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month);
/* We can't validate day_of_month here, because we don't know if
* it's a leap year. */
}
MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
{
if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
(is_leap_year ? "leap, " : "non-leap, "), day_of_year);
month_ = 1;
uint16_t d = day_of_year;
while (true)
{
const auto len = monthLength(is_leap_year, month_);
if (d <= len)
break;
++month_;
d -= len;
}
day_of_month_ = d;
}
uint16_t MonthDay::dayOfYear(bool is_leap_year) const
{
if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
(is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_);
}
const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
return (367 * month_ - 362) / 12 + k + day_of_month_;
}
template void GregorianDate::writeImpl<void>(WriteBuffer & buf) const;
template bool GregorianDate::writeImpl<bool>(WriteBuffer & buf) const;
}

View File

@ -1,408 +1,155 @@
#pragma once #pragma once
#include <base/extended_types.h>
#include <Common/Exception.h>
#include <Core/Types.h> #include <Core/Types.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <cstdint>
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_FORMAT_DATETIME;
extern const int LOGICAL_ERROR;
}
/** Proleptic Gregorian calendar date. YearT is an integral type class ReadBuffer;
class WriteBuffer;
/// Proleptic Gregorian calendar date.
class GregorianDate
{
public:
GregorianDate() {}
void init(ReadBuffer & in);
bool tryInit(ReadBuffer & in);
/** Construct from date in text form 'YYYY-MM-DD' by reading from
* ReadBuffer.
*/
explicit GregorianDate(ReadBuffer & in);
void init(int64_t modified_julian_day);
bool tryInit(int64_t modified_julian_day);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
explicit GregorianDate(int64_t modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral type
* which should be at least 32 bits wide, and should preferably * which should be at least 32 bits wide, and should preferably
* be signed. * signed.
*/ */
template <typename YearT = int32_t> int64_t toModifiedJulianDay() const;
class GregorianDate bool tryToModifiedJulianDay(int64_t & res) const;
/** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
void write(WriteBuffer & buf) const
{ {
public: writeImpl<void>(buf);
/** Construct from date in text form 'YYYY-MM-DD' by reading from }
* ReadBuffer.
*/
explicit GregorianDate(ReadBuffer & in);
/** Construct from Modified Julian Day. The type T is an bool tryWrite(WriteBuffer & buf) const
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
explicit GregorianDate(is_integer auto modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral type
* which should be at least 32 bits wide, and should preferably
* signed.
*/
template <is_integer T>
T toModifiedJulianDay() const;
/** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
void write(WriteBuffer & buf) const;
/** Convert to a string in text form 'YYYY-MM-DD'.
*/
std::string toString() const;
YearT year() const noexcept
{
return year_;
}
uint8_t month() const noexcept
{
return month_;
}
uint8_t day_of_month() const noexcept /// NOLINT
{
return day_of_month_;
}
private:
YearT year_; /// NOLINT
uint8_t month_; /// NOLINT
uint8_t day_of_month_; /// NOLINT
};
/** ISO 8601 Ordinal Date. YearT is an integral type which should
* be at least 32 bits wide, and should preferably signed.
*/
template <typename YearT = int32_t>
class OrdinalDate
{ {
public: return writeImpl<bool>(buf);
OrdinalDate(YearT year, uint16_t day_of_year); }
/** Construct from Modified Julian Day. The type T is an /** Convert to a string in text form 'YYYY-MM-DD'.
* integral type which should be at least 32 bits wide, and */
* should preferably signed. std::string toString() const;
*/
template <is_integer DayT>
explicit OrdinalDate(DayT modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral int32_t year() const noexcept
* type which should be at least 32 bits wide, and should
* preferably be signed.
*/
template <is_integer T>
T toModifiedJulianDay() const noexcept;
YearT year() const noexcept
{
return year_;
}
uint16_t dayOfYear() const noexcept
{
return day_of_year_;
}
private:
YearT year_; /// NOLINT
uint16_t day_of_year_; /// NOLINT
};
class MonthDay
{ {
public: return year_;
/** Construct from month and day. */ }
MonthDay(uint8_t month, uint8_t day_of_month);
/** Construct from day of year in Gregorian or Julian uint8_t month() const noexcept
* calendars to month and day. {
*/ return month_;
MonthDay(bool is_leap_year, uint16_t day_of_year); }
/** Convert month and day in Gregorian or Julian calendars to uint8_t dayOfMonth() const noexcept
* day of year. {
*/ return day_of_month_;
uint16_t dayOfYear(bool is_leap_year) const; }
uint8_t month() const noexcept private:
{ int32_t year_ = 0;
return month_; uint8_t month_ = 0;
} uint8_t day_of_month_ = 0;
uint8_t day_of_month() const noexcept /// NOLINT template <typename ReturnType>
{ ReturnType writeImpl(WriteBuffer & buf) const;
return day_of_month_; };
}
private: /** ISO 8601 Ordinal Date.
uint8_t month_; /// NOLINT */
uint8_t day_of_month_; /// NOLINT class OrdinalDate
};
}
/* Implementation */
namespace gd
{ {
using namespace DB; public:
OrdinalDate() {}
template <typename YearT> void init(int32_t year, uint16_t day_of_year);
static inline constexpr bool is_leap_year(YearT year) bool tryInit(int32_t year, uint16_t day_of_year);
{
return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
}
static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) void init(int64_t modified_julian_day);
{ bool tryInit(int64_t modified_julian_day);
switch (month)
{
case 1: return 31;
case 2: return is_leap_year ? 29 : 28;
case 3: return 31;
case 4: return 30;
case 5: return 31;
case 6: return 30;
case 7: return 31;
case 8: return 31;
case 9: return 30;
case 10: return 31;
case 11: return 30;
case 12: return 31;
default:
std::terminate();
}
}
/** Integer division truncated toward negative infinity. OrdinalDate(int32_t year, uint16_t day_of_year);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/ */
template <typename I, typename J> explicit OrdinalDate(int64_t modified_julian_day);
static inline constexpr I div(I x, J y)
{
const auto y_cast = static_cast<I>(y);
if (x > 0 && y_cast < 0)
return ((x - 1) / y_cast) - 1;
else if (x < 0 && y_cast > 0)
return ((x + 1) / y_cast) - 1;
else
return x / y_cast;
}
/** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. /** Convert to Modified Julian Day. The type T is an integral
* type which should be at least 32 bits wide, and should
* preferably be signed.
*/ */
template <typename I, typename J> int64_t toModifiedJulianDay() const noexcept;
static inline constexpr I mod(I x, J y)
int32_t year() const noexcept
{ {
const auto y_cast = static_cast<I>(y); return year_;
const auto r = x % y_cast;
if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
return r == 0 ? static_cast<I>(0) : r + y_cast;
else
return r;
} }
/** Like std::min(), but the type of operands may differ. uint16_t dayOfYear() const noexcept
*/
template <typename I, typename J>
static inline constexpr I min(I x, J y)
{ {
const auto y_cast = static_cast<I>(y); return day_of_year_;
return x < y_cast ? x : y_cast;
} }
static inline char readDigit(ReadBuffer & in) private:
{ int32_t year_ = 0;
char c; uint16_t day_of_year_ = 0;
if (!in.read(c)) };
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
else if (c < '0' || c > '9')
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
else
return c - '0';
}
}
namespace DB class MonthDay
{ {
template <typename YearT> public:
GregorianDate<YearT>::GregorianDate(ReadBuffer & in) /** Construct from month and day. */
MonthDay(uint8_t month, uint8_t day_of_month);
/** Construct from day of year in Gregorian or Julian
* calendars to month and day.
*/
MonthDay(bool is_leap_year, uint16_t day_of_year);
/** Convert month and day in Gregorian or Julian calendars to
* day of year.
*/
uint16_t dayOfYear(bool is_leap_year) const;
uint8_t month() const noexcept
{ {
year_ = gd::readDigit(in) * 1000 return month_;
+ gd::readDigit(in) * 100
+ gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
day_of_month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertEOF(in);
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
} }
template <typename YearT> uint8_t dayOfMonth() const noexcept
GregorianDate<YearT>::GregorianDate(is_integer auto modified_julian_day)
{ {
const OrdinalDate<YearT> ord(modified_julian_day); return day_of_month_;
const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.day_of_month();
} }
template <typename YearT> private:
template <is_integer T> uint8_t month_ = 0;
T GregorianDate<YearT>::toModifiedJulianDay() const uint8_t day_of_month_ = 0;
{ };
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
const OrdinalDate<YearT> ord(year_, day_of_year);
return ord.template toModifiedJulianDay<T>();
}
template <typename YearT>
void GregorianDate<YearT>::write(WriteBuffer & buf) const
{
if (year_ < 0 || year_ > 9999)
{
throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
"Impossible to stringify: year too big or small: {}", DB::toString(year_));
}
else
{
auto y = year_;
writeChar('0' + y / 1000, buf); y %= 1000;
writeChar('0' + y / 100, buf); y %= 100;
writeChar('0' + y / 10, buf); y %= 10;
writeChar('0' + y , buf);
writeChar('-', buf);
auto m = month_;
writeChar('0' + m / 10, buf); m %= 10;
writeChar('0' + m , buf);
writeChar('-', buf);
auto d = day_of_month_;
writeChar('0' + d / 10, buf); d %= 10;
writeChar('0' + d , buf);
}
}
template <typename YearT>
std::string GregorianDate<YearT>::toString() const
{
WriteBufferFromOwnString buf;
write(buf);
return buf.str();
}
template <typename YearT>
OrdinalDate<YearT>::OrdinalDate(YearT year, uint16_t day_of_year)
: year_(year)
, day_of_year_(day_of_year)
{
if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
}
}
template <typename YearT>
template <is_integer DayT>
OrdinalDate<YearT>::OrdinalDate(DayT modified_julian_day)
{
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
if constexpr (is_signed_v<DayT> && std::numeric_limits<DayT>::lowest() < -678941)
if (modified_julian_day < -678941)
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
if constexpr (std::numeric_limits<DayT>::max() > 2973119)
if (modified_julian_day > 2973119)
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
const auto a = modified_julian_day + 678575;
const auto quad_cent = gd::div(a, 146097);
const auto b = gd::mod(a, 146097);
const auto cent = gd::min(gd::div(b, 36524), 3);
const auto c = b - cent * 36524;
const auto quad = gd::div(c, 1461);
const auto d = gd::mod(c, 1461);
const auto y = gd::min(gd::div(d, 365), 3);
day_of_year_ = d - y * 365 + 1;
year_ = static_cast<YearT>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
}
template <typename YearT>
template <is_integer T>
T OrdinalDate<YearT>::toModifiedJulianDay() const noexcept
{
const auto y = year_ - 1;
return day_of_year_
+ 365 * y
+ gd::div(y, 4)
- gd::div(y, 100)
+ gd::div(y, 400)
- 678576;
}
inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
: month_(month)
, day_of_month_(day_of_month)
{
if (month < 1 || month > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
/* We can't validate day_of_month here, because we don't know if
* it's a leap year. */
}
inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
{
if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
(is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
month_ = 1;
uint16_t d = day_of_year;
while (true)
{
const auto len = gd::monthLength(is_leap_year, month_);
if (d <= len)
break;
month_++;
d -= len;
}
day_of_month_ = d;
}
inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const
{
if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
(is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
}
const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
return (367 * month_ - 362) / 12 + k + day_of_month_;
}
} }

View File

@ -13,12 +13,12 @@
#include <IO/WriteBufferFromVector.h> #include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int CANNOT_FORMAT_DATETIME;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
} }
@ -56,25 +56,14 @@ namespace DB
{ {
if constexpr (nullOnErrors) if constexpr (nullOnErrors)
{ {
try GregorianDate gd;
{ (*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer));
const GregorianDate<> gd(vec_from[i]);
gd.write(write_buffer);
(*vec_null_map_to)[i] = false;
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_FORMAT_DATETIME)
(*vec_null_map_to)[i] = true;
else
throw;
}
writeChar(0, write_buffer); writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count(); offsets_to[i] = write_buffer.count();
} }
else else
{ {
const GregorianDate<> gd(vec_from[i]); GregorianDate gd(vec_from[i]);
gd.write(write_buffer); gd.write(write_buffer);
writeChar(0, write_buffer); writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count(); offsets_to[i] = write_buffer.count();

View File

@ -65,15 +65,7 @@ private:
if (!arg_string) if (!arg_string)
return argument.type; return argument.type;
try return DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
{
DataTypePtr type = DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
return type;
}
catch (const DB::Exception &)
{
return argument.type;
}
} }
}; };

View File

@ -398,7 +398,7 @@ namespace
static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_) static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_)
{ {
if (!isDayOfYearValid(year_, day_of_year_)) if (!isDayOfYearValid(year_, day_of_year_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_); throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_);
Int32 res = daysSinceEpochFromDate(year_, 1, 1); Int32 res = daysSinceEpochFromDate(year_, 1, 1);
res += day_of_year_ - 1; res += day_of_year_ - 1;
@ -408,7 +408,7 @@ namespace
static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_) static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_)
{ {
if (!isDateValid(year_, month_, day_)) if (!isDateValid(year_, month_, day_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_); throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_);
Int32 res = cumulativeYearDays[year_ - 1970]; Int32 res = cumulativeYearDays[year_ - 1970];
res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1]; res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1];

View File

@ -17,8 +17,6 @@ namespace DB
{ {
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
} }
template <typename Name, typename ToDataType, bool nullOnErrors> template <typename Name, typename ToDataType, bool nullOnErrors>
@ -78,27 +76,18 @@ namespace DB
if constexpr (nullOnErrors) if constexpr (nullOnErrors)
{ {
try GregorianDate date;
{
const GregorianDate<> date(read_buffer); int64_t res = 0;
vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>(); bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res);
vec_null_map_to[i] = false;
} vec_to[i] = static_cast<typename ToDataType::FieldType>(res);
catch (const Exception & e) vec_null_map_to[i] = !success;
{
if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE)
{
vec_to[i] = static_cast<Int32>(0);
vec_null_map_to[i] = true;
}
else
throw;
}
} }
else else
{ {
const GregorianDate<> date(read_buffer); const GregorianDate date(read_buffer);
vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>(); vec_to[i] = static_cast<typename ToDataType::FieldType>(date.toModifiedJulianDay());
} }
} }

View File

@ -156,7 +156,7 @@ namespace
{ {
initialize(arguments, result_type); initialize(arguments, result_type);
const auto * in = arguments.front().column.get(); const auto * in = arguments[0].column.get();
if (isColumnConst(*in)) if (isColumnConst(*in))
return executeConst(arguments, result_type, input_rows_count); return executeConst(arguments, result_type, input_rows_count);
@ -165,6 +165,10 @@ namespace
if (!cache.default_column && arguments.size() == 4) if (!cache.default_column && arguments.size() == 4)
default_non_const = castColumn(arguments[3], result_type); default_non_const = castColumn(arguments[3], result_type);
ColumnPtr in_casted = arguments[0].column;
if (arguments.size() == 3)
in_casted = castColumn(arguments[0], result_type);
auto column_result = result_type->createColumn(); auto column_result = result_type->createColumn();
if (cache.is_empty) if (cache.is_empty)
{ {
@ -174,30 +178,30 @@ namespace
} }
else if (cache.table_num_to_idx) else if (cache.table_num_to_idx)
{ {
if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const) if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const) && !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const) && !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const)) && !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const, *in_casted))
{ {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
} }
} }
else if (cache.table_string_to_idx) else if (cache.table_string_to_idx)
{ {
if (!executeString(in, *column_result, default_non_const)) if (!executeString(in, *column_result, default_non_const, *in_casted))
executeContiguous(in, *column_result, default_non_const); executeContiguous(in, *column_result, default_non_const, *in_casted);
} }
else if (cache.table_anything_to_idx) else if (cache.table_anything_to_idx)
{ {
executeAnything(in, *column_result, default_non_const); executeAnything(in, *column_result, default_non_const, *in_casted);
} }
else else
throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized");
@ -218,7 +222,7 @@ namespace
return impl->execute(args, result_type, input_rows_count); return impl->execute(args, result_type, input_rows_count);
} }
void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{ {
const size_t size = in->size(); const size_t size = in->size();
const auto & table = *cache.table_anything_to_idx; const auto & table = *cache.table_anything_to_idx;
@ -236,11 +240,11 @@ namespace
else if (default_non_const) else if (default_non_const)
column_result.insertFrom(*default_non_const, i); column_result.insertFrom(*default_non_const, i);
else else
column_result.insertFrom(*in, i); column_result.insertFrom(in_casted, i);
} }
} }
void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{ {
const size_t size = in->size(); const size_t size = in->size();
const auto & table = *cache.table_string_to_idx; const auto & table = *cache.table_string_to_idx;
@ -255,12 +259,12 @@ namespace
else if (default_non_const) else if (default_non_const)
column_result.insertFrom(*default_non_const, i); column_result.insertFrom(*default_non_const, i);
else else
column_result.insertFrom(*in, i); column_result.insertFrom(in_casted, i);
} }
} }
template <typename T> template <typename T>
bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{ {
const auto * const in = checkAndGetColumn<T>(in_untyped); const auto * const in = checkAndGetColumn<T>(in_untyped);
if (!in) if (!in)
@ -297,7 +301,7 @@ namespace
else if (default_non_const) else if (default_non_const)
column_result.insertFrom(*default_non_const, i); column_result.insertFrom(*default_non_const, i);
else else
column_result.insertFrom(*in, i); column_result.insertFrom(in_casted, i);
} }
} }
return true; return true;
@ -451,7 +455,7 @@ namespace
} }
} }
bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{ {
const auto * const in = checkAndGetColumn<ColumnString>(in_untyped); const auto * const in = checkAndGetColumn<ColumnString>(in_untyped);
if (!in) if (!in)
@ -488,7 +492,7 @@ namespace
else if (default_non_const) else if (default_non_const)
column_result.insertFrom(*default_non_const, 0); column_result.insertFrom(*default_non_const, 0);
else else
column_result.insertFrom(*in, i); column_result.insertFrom(in_casted, i);
} }
} }
return true; return true;
@ -654,13 +658,13 @@ namespace
std::unique_ptr<StringToIdx> table_string_to_idx; std::unique_ptr<StringToIdx> table_string_to_idx;
std::unique_ptr<AnythingToIdx> table_anything_to_idx; std::unique_ptr<AnythingToIdx> table_anything_to_idx;
bool is_empty = false;
ColumnPtr from_column; ColumnPtr from_column;
ColumnPtr to_column; ColumnPtr to_column;
ColumnPtr default_column; ColumnPtr default_column;
std::atomic<bool> initialized{false}; bool is_empty = false;
bool initialized = false;
std::mutex mutex; std::mutex mutex;
}; };
@ -693,13 +697,12 @@ namespace
/// Can be called from different threads. It works only on the first call. /// Can be called from different threads. It works only on the first call.
void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
{ {
std::lock_guard lock(cache.mutex);
if (cache.initialized) if (cache.initialized)
return; return;
const DataTypePtr & from_type = arguments[0].type; const DataTypePtr & from_type = arguments[0].type;
std::lock_guard lock(cache.mutex);
if (from_type->onlyNull()) if (from_type->onlyNull())
{ {
cache.is_empty = true; cache.is_empty = true;

View File

@ -1,5 +1,4 @@
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeTuple.h> #include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
@ -86,7 +85,7 @@ public:
auto plus_elem = plus->build({left_type, right_type}); auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;

View File

@ -95,7 +95,7 @@ public:
auto elem_func = func->build(ColumnsWithTypeAndName{left, right}); auto elem_func = func->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_func->getResultType(); types[i] = elem_func->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -181,7 +181,7 @@ public:
auto elem_negate = negate->build(ColumnsWithTypeAndName{cur}); auto elem_negate = negate->build(ColumnsWithTypeAndName{cur});
types[i] = elem_negate->getResultType(); types[i] = elem_negate->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -258,7 +258,7 @@ public:
auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column}); auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column});
types[i] = elem_func->getResultType(); types[i] = elem_func->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -363,7 +363,7 @@ public:
auto plus_elem = plus->build({left_type, right_type}); auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -467,7 +467,7 @@ public:
auto plus_elem = plus->build({left, right}); auto plus_elem = plus->build({left, right});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -740,7 +740,7 @@ public:
auto plus_elem = plus->build({left_type, right_type}); auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -842,7 +842,7 @@ public:
auto plus_elem = plus->build({left_type, right_type}); auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -993,7 +993,7 @@ public:
auto max_elem = max->build({left_type, right_type}); auto max_elem = max->build({left_type, right_type});
res_type = max_elem->getResultType(); res_type = max_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;
@ -1103,7 +1103,7 @@ public:
auto plus_elem = plus->build({left_type, right_type}); auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType(); res_type = plus_elem->getResultType();
} }
catch (DB::Exception & e) catch (Exception & e)
{ {
e.addMessage("While executing function {} for tuple element {}", getName(), i); e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw; throw;

View File

@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::callWithRedirects(Poco::N
current_session = session; current_session = session;
call(current_session, response, method_, throw_on_all_errors, for_object_info); call(current_session, response, method_, throw_on_all_errors, for_object_info);
Poco::URI prev_uri = uri; saved_uri_redirect = uri;
while (isRedirect(response.getStatus())) while (isRedirect(response.getStatus()))
{ {
Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response); Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response);
prev_uri = uri_redirect; saved_uri_redirect = uri_redirect;
if (remote_host_filter) if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect); remote_host_filter->checkURL(uri_redirect);

View File

@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// This is needed for TOP expression, because it can also use WITH TIES. /// This is needed for TOP expression, because it can also use WITH TIES.
bool limit_with_ties_occured = false; bool limit_with_ties_occured = false;
bool has_offset_clause = false;
bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS}
/// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list /// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list
if (s_limit.ignore(pos, expected)) if (s_limit.ignore(pos, expected))
{ {
@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{ {
if (!exp_elem.parse(pos, limit_offset, expected)) if (!exp_elem.parse(pos, limit_offset, expected))
return false; return false;
has_offset_clause = true;
} }
else if (s_with_ties.ignore(pos, expected)) else if (s_with_ties.ignore(pos, expected))
{ {
@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
} }
else if (s_offset.ignore(pos, expected)) else if (s_offset.ignore(pos, expected))
{ {
/// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} /// OFFSET without LIMIT
bool offset_with_fetch_maybe = false;
has_offset_clause = true;
if (!exp_elem.parse(pos, limit_offset, expected)) if (!exp_elem.parse(pos, limit_offset, expected))
return false; return false;
/// SQL standard OFFSET N ROW[S] ...
if (s_row.ignore(pos, expected))
offset_clause_has_sql_standard_row_or_rows = true;
if (s_rows.ignore(pos, expected))
{
if (offset_clause_has_sql_standard_row_or_rows)
throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
offset_clause_has_sql_standard_row_or_rows = true;
}
}
/// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY)
if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows)
&& s_fetch.ignore(pos, expected))
{
/// FETCH clause must exist with "ORDER BY"
if (!order_expression_list)
throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY");
if (s_first.ignore(pos, expected))
{
if (s_next.ignore(pos, expected))
throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
}
else if (!s_next.ignore(pos, expected))
return false;
if (!exp_elem.parse(pos, limit_length, expected))
return false;
if (s_row.ignore(pos, expected)) if (s_row.ignore(pos, expected))
{ {
if (s_rows.ignore(pos, expected)) if (s_rows.ignore(pos, expected))
throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
offset_with_fetch_maybe = true;
} }
else if (s_rows.ignore(pos, expected)) else if (!s_rows.ignore(pos, expected))
return false;
if (s_with_ties.ignore(pos, expected))
{ {
offset_with_fetch_maybe = true; select_query->limit_with_ties = true;
} }
else if (s_only.ignore(pos, expected))
if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected))
{ {
/// OFFSET FETCH clause must exists with "ORDER BY" select_query->limit_with_ties = false;
if (!order_expression_list) }
throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); else
{
if (s_first.ignore(pos, expected)) return false;
{
if (s_next.ignore(pos, expected))
throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together");
}
else if (!s_next.ignore(pos, expected))
return false;
if (!exp_elem.parse(pos, limit_length, expected))
return false;
if (s_row.ignore(pos, expected))
{
if (s_rows.ignore(pos, expected))
throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together");
}
else if (!s_rows.ignore(pos, expected))
return false;
if (s_with_ties.ignore(pos, expected))
{
select_query->limit_with_ties = true;
}
else if (s_only.ignore(pos, expected))
{
select_query->limit_with_ties = false;
}
else
{
return false;
}
} }
} }

View File

@ -369,14 +369,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
break; break;
case avro::AVRO_UNION: case avro::AVRO_UNION:
{ {
if (root_node->leaves() == 2 if (root_node->leaves() == 1)
{
auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type);
return [nested_deserialize](IColumn & column, avro::Decoder & decoder)
{
decoder.decodeUnionIndex();
nested_deserialize(column, decoder);
return true;
};
}
/// FIXME Support UNION has more than two datatypes.
else if (
root_node->leaves() == 2
&& (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL)) && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL))
{ {
int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0;
if (target.isNullable()) if (target.isNullable())
{ {
auto nested_deserialize = this->createDeserializeFn( auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type));
root_node->leafAt(non_null_union_index), removeNullable(target_type));
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{ {
ColumnNullable & col = assert_cast<ColumnNullable &>(column); ColumnNullable & col = assert_cast<ColumnNullable &>(column);
@ -395,7 +406,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro
} }
else if (null_as_default) else if (null_as_default)
{ {
auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type);
return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder)
{ {
int union_index = static_cast<int>(decoder.decodeUnionIndex()); int union_index = static_cast<int>(decoder.decodeUnionIndex());
@ -1192,12 +1203,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node)
case avro::Type::AVRO_NULL: case avro::Type::AVRO_NULL:
return std::make_shared<DataTypeNothing>(); return std::make_shared<DataTypeNothing>();
case avro::Type::AVRO_UNION: case avro::Type::AVRO_UNION:
if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) if (node->leaves() == 1)
{
return avroNodeToDataType(node->leafAt(0));
}
else if (
node->leaves() == 2
&& (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL))
{ {
int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0;
auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index)); auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index));
return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type; return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type;
} }
/// FIXME Support UNION has more than two datatypes.
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting."); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting.");
case avro::Type::AVRO_SYMBOLIC: case avro::Type::AVRO_SYMBOLIC:
return avroNodeToDataType(avro::resolveSymbol(node)); return avroNodeToDataType(avro::resolveSymbol(node));

View File

@ -389,7 +389,7 @@ std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource:
for (; option != end; ++option) for (; option != end; ++option)
{ {
bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end);
auto request_uri = Poco::URI(*option); auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding);
for (const auto & [param, value] : params) for (const auto & [param, value] : params)
request_uri.addQueryParameter(param, value); request_uri.addQueryParameter(param, value);

View File

@ -143,6 +143,7 @@ def test_string_functions(start_cluster):
"position", "position",
"substring", "substring",
"CAST", "CAST",
"getTypeSerializationStreams",
# NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument
# 22.8 Backward Incompatible Change: Extended range of Date32 # 22.8 Backward Incompatible Change: Extended range of Date32
"toDate32OrZero", "toDate32OrZero",

View File

@ -215,7 +215,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
if attempt == 59: if attempt == 59:
assert parts_count == "(1)" assert parts_count == "(1)"
time.sleep(1) time.sleep(10)
assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
assert ( assert (

View File

@ -0,0 +1,36 @@
┌─name───────────┬─department─┬─salary─┐
│ Frank │ it │ 120 │
│ Henry or Irene │ it │ 104 │
│ Henry or Irene │ it │ 104 │
│ Alice │ sales │ 100 │
│ Dave or Cindy │ sales │ 96 │
└────────────────┴────────────┴────────┘
┌─name───────────┬─department─┬─salary─┐
│ Frank │ it │ 120 │
│ Henry or Irene │ it │ 104 │
│ Henry or Irene │ it │ 104 │
│ Alice │ sales │ 100 │
│ Dave or Cindy │ sales │ 96 │
└────────────────┴────────────┴────────┘
┌─name───────────┬─department─┬─salary─┐
│ Frank │ it │ 120 │
│ Henry or Irene │ it │ 104 │
│ Henry or Irene │ it │ 104 │
│ Alice │ sales │ 100 │
│ Dave or Cindy │ sales │ 96 │
│ Dave or Cindy │ sales │ 96 │
└────────────────┴────────────┴────────┘
┌─name──────────┬─department─┬─salary─┐
│ Alice │ sales │ 100 │
│ Dave or Cindy │ sales │ 96 │
│ Dave or Cindy │ sales │ 96 │
│ Grace │ it │ 90 │
│ Emma │ it │ 84 │
└───────────────┴────────────┴────────┘
┌─name──────────┬─department─┬─salary─┐
│ Alice │ sales │ 100 │
│ Dave or Cindy │ sales │ 96 │
│ Dave or Cindy │ sales │ 96 │
│ Grace │ it │ 90 │
│ Emma │ it │ 84 │
└───────────────┴────────────┴────────┘

View File

@ -0,0 +1,34 @@
-- https://antonz.org/sql-fetch/
CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64);
INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', 84);
-- Determinism
SET max_threads = 1, parallelize_output_from_storages = 0;
select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
order by salary desc
limit 5
format PrettyCompactNoEscapes;
select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
order by salary desc
fetch first 5 rows only
format PrettyCompactNoEscapes;
select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
order by salary desc
fetch first 5 rows with ties
format PrettyCompactNoEscapes;
select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
order by salary desc
offset 3 rows
fetch next 5 rows only
format PrettyCompactNoEscapes;
select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary)
order by salary desc
offset 3 rows
fetch first 5 rows only
format PrettyCompactNoEscapes;

View File

@ -0,0 +1,5 @@
name String
favorite_number Int32
favorite_color String
Alyssa 256 yellow
Ben 7 red

View File

@ -0,0 +1,13 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
set -e
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
DATA_DIR=$CUR_DIR/data_avro
$CLICKHOUSE_LOCAL -q "desc file('$DATA_DIR/union_one_type.avro')"
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_DIR/union_one_type.avro')"

View File

@ -0,0 +1,3 @@
test
\N

View File

@ -0,0 +1,3 @@
SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);
SELECT transform(name, ['test', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);
SELECT transform(name, ['a', 'test'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name);