ClickHouse/src/Client/ClientBaseHelpers.cpp
2024-04-29 19:53:50 +02:00

204 lines
7.3 KiB
C++

#include "ClientBaseHelpers.h"
#include <Common/DateLUT.h>
#include <Common/LocalDate.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Common/UTF8Helpers.h>
#include <iostream>
namespace DB
{
/// Should we celebrate a bit?
bool isNewYearMode()
{
time_t current_time = time(nullptr);
/// It's bad to be intrusive.
if (current_time % 3 != 0)
return false;
LocalDate now(current_time);
return (now.month() == 12 && now.day() >= 20) || (now.month() == 1 && now.day() <= 5);
}
bool isChineseNewYearMode(const String & local_tz)
{
/// Days of Dec. 20 in Chinese calendar starting from year 2019 to year 2105
static constexpr UInt16 chineseNewYearIndicators[]
= {18275, 18659, 19014, 19368, 19752, 20107, 20491, 20845, 21199, 21583, 21937, 22292, 22676, 23030, 23414, 23768, 24122, 24506,
24860, 25215, 25599, 25954, 26308, 26692, 27046, 27430, 27784, 28138, 28522, 28877, 29232, 29616, 29970, 30354, 30708, 31062,
31446, 31800, 32155, 32539, 32894, 33248, 33632, 33986, 34369, 34724, 35078, 35462, 35817, 36171, 36555, 36909, 37293, 37647,
38002, 38386, 38740, 39095, 39479, 39833, 40187, 40571, 40925, 41309, 41664, 42018, 42402, 42757, 43111, 43495, 43849, 44233,
44587, 44942, 45326, 45680, 46035, 46418, 46772, 47126, 47510, 47865, 48249, 48604, 48958, 49342};
/// All time zone names are acquired from https://www.iana.org/time-zones
static constexpr const char * chineseNewYearTimeZoneIndicators[] = {
/// Time zones celebrating Chinese new year.
"Asia/Shanghai",
"Asia/Chongqing",
"Asia/Harbin",
"Asia/Urumqi",
"Asia/Hong_Kong",
"Asia/Chungking",
"Asia/Macao",
"Asia/Macau",
"Asia/Taipei",
"Asia/Singapore",
/// Time zones celebrating Chinese new year but with different festival names. Let's not print the message for now.
// "Asia/Brunei",
// "Asia/Ho_Chi_Minh",
// "Asia/Hovd",
// "Asia/Jakarta",
// "Asia/Jayapura",
// "Asia/Kashgar",
// "Asia/Kuala_Lumpur",
// "Asia/Kuching",
// "Asia/Makassar",
// "Asia/Pontianak",
// "Asia/Pyongyang",
// "Asia/Saigon",
// "Asia/Seoul",
// "Asia/Ujung_Pandang",
// "Asia/Ulaanbaatar",
// "Asia/Ulan_Bator",
};
static constexpr size_t M = sizeof(chineseNewYearTimeZoneIndicators) / sizeof(chineseNewYearTimeZoneIndicators[0]);
time_t current_time = time(nullptr);
if (chineseNewYearTimeZoneIndicators + M
== std::find_if(chineseNewYearTimeZoneIndicators, chineseNewYearTimeZoneIndicators + M, [&local_tz](const char * tz)
{
return tz == local_tz;
}))
return false;
/// It's bad to be intrusive.
if (current_time % 3 != 0)
return false;
auto days = DateLUT::instance().toDayNum(current_time).toUnderType();
for (auto d : chineseNewYearIndicators)
{
/// Let's celebrate until Lantern Festival
if (d <= days && d + 25 >= days)
return true;
else if (d > days)
return false;
}
return false;
}
#if USE_REPLXX
void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors)
{
using namespace replxx;
/// The `colors` array maps to a Unicode code point position in a string into a color.
/// A color is set for every position individually (not for a range).
/// Empty input.
if (colors.empty())
return;
/// The colors should be legible (and look gorgeous) in both dark and light themes.
/// When modifying this, check it in both themes.
static const std::unordered_map<Highlight, Replxx::Color> type_to_color =
{
{Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)},
{Highlight::identifier, Replxx::Color::CYAN},
{Highlight::function, Replxx::Color::BROWN},
{Highlight::alias, replxx::color::rgb666(0, 4, 4)},
{Highlight::substitution, Replxx::Color::MAGENTA},
{Highlight::number, replxx::color::rgb666(0, 4, 0)},
{Highlight::string, Replxx::Color::GREEN},
};
/// We set reasonably small limits for size/depth, because we don't want the CLI to be slow.
/// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway.
const char * begin = query.data();
const char * end = begin + query.size();
Tokens tokens(begin, end, 10000, true);
IParser::Pos token_iterator(tokens, static_cast<uint32_t>(1000), static_cast<uint32_t>(10000));
Expected expected;
expected.enable_highlighting = true;
/// We don't do highlighting for foreign dialects, such as PRQL and Kusto.
/// Only normal ClickHouse SQL queries are highlighted.
/// Currently we highlight only the first query in the multi-query mode.
ParserQuery parser(end);
ASTPtr ast;
bool parse_res = false;
try
{
parse_res = parser.parse(token_iterator, ast, expected);
}
catch (...)
{
/// Skip highlighting in the case of exceptions during parsing.
/// It is ok to ignore unknown exceptions here.
return;
}
size_t pos = 0;
const char * prev = begin;
for (const auto & range : expected.highlights)
{
auto it = type_to_color.find(range.highlight);
if (it != type_to_color.end())
{
/// We have to map from byte positions to Unicode positions.
pos += UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(prev), range.begin - prev);
size_t utf8_len = UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(range.begin), range.end - range.begin);
for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index)
colors[pos + code_point_index] = it->second;
pos += utf8_len;
prev = range.end;
}
}
Token last_token = token_iterator.max();
/// Raw data in INSERT queries, which is not necessarily tokenized.
const char * insert_data = ast ? getInsertData(ast) : nullptr;
/// Highlight the last error in red. If the parser failed or the lexer found an invalid token,
/// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed)
if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon))
&& !(insert_data && expected.max_parsed_pos >= insert_data)
&& expected.max_parsed_pos >= prev)
{
pos += UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(prev), expected.max_parsed_pos - prev);
if (pos >= colors.size())
pos = colors.size() - 1;
colors[pos] = Replxx::Color::BRIGHTRED;
}
/// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it.
if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter
|| query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized.
{
ReplxxLineReader::setLastIsDelimiter(true);
}
else if (last_token.type != TokenType::Whitespace)
{
ReplxxLineReader::setLastIsDelimiter(false);
}
}
#endif
}