Merge branch 'master' into cache-fix

This commit is contained in:
mergify[bot] 2022-04-13 13:32:56 +00:00 committed by GitHub
commit 2c5d6d132e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 2652 additions and 598 deletions

View File

@ -153,13 +153,19 @@ jobs:
EOF
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Fast Test
run: |
sudo rm -fr "$GITHUB_WORKSPACE"
mkdir "$GITHUB_WORKSPACE"
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.TEMP_PATH }}
- name: Fast Test
run: |
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 fast_test_check.py
- name: Cleanup
@ -1052,7 +1058,6 @@ jobs:
cat >> "$GITHUB_ENV" << 'EOF'
CHECK_NAME=ClickHouse build check (actions)
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
EOF
- name: Download json reports

View File

@ -46,7 +46,7 @@ macro(clickhouse_make_empty_debug_info_for_nfpm)
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
COMMENT "Adding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)

View File

@ -115,6 +115,7 @@ function start_server
function clone_root
{
git config --global --add safe.directory "$FASTTEST_SOURCE"
git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt"
(

View File

@ -329,8 +329,8 @@ then
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "Renaming unexpected part" \
/var/log/clickhouse-server/clickhouse-server.backward.*.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tOK' >> /test_output/test_results.tsv \
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
@ -346,7 +346,7 @@ then
# OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tOK' >> /test_output/test_results.tsv \
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
@ -366,7 +366,7 @@ then
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tOK' >> /test_output/test_results.tsv \
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty

View File

@ -131,7 +131,7 @@ def prepare_for_hung_check(drop_databases):
Popen(command, shell=True)
break
except Exception as ex:
print("Failed to SHOW or DROP databasese, will retry", ex)
logging.error("Failed to SHOW or DROP databasese, will retry %s", str(ex))
time.sleep(i)
else:
raise Exception("Cannot drop databases after stress tests. Probably server consumed too much memory and cannot execute simple queries")
@ -198,7 +198,11 @@ if __name__ == "__main__":
logging.info("Logs compressed")
if args.hung_check:
have_long_running_queries = prepare_for_hung_check(args.drop_databases)
try:
have_long_running_queries = prepare_for_hung_check(args.drop_databases)
except Exception as ex:
have_long_running_queries = True
logging.error("Failed to prepare for hung check %s", str(ex))
logging.info("Checking if some queries hung")
cmd = ' '.join([args.test_cmd,
# Do not track memory allocations up to 1Gi,
@ -215,6 +219,8 @@ if __name__ == "__main__":
"--client-option", "max_untracked_memory=1Gi",
"--client-option", "max_memory_usage_for_user=0",
"--client-option", "memory_profiler_step=1Gi",
# Use system database to avoid CREATE/DROP DATABASE queries
"--database=system",
"--hung-check",
"00001_select_1"
])

View File

@ -47,7 +47,7 @@ Optional parameters:
- `kafka_row_delimiter` — Delimiter character, which ends the message.
- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed.
- `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`).
- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).

View File

@ -22,7 +22,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [INTERPOLATE [(expr_list)]]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...]

View File

@ -280,6 +280,7 @@ To fill multiple columns, add `WITH FILL` modifier with optional parameters afte
``` sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]]
```
`WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings.
@ -287,6 +288,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field
When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`.
When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals.
When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type.
`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns.
Example of a query without `WITH FILL`:
@ -483,4 +485,62 @@ Result:
└────────────┴────────────┴──────────┘
```
Example of a query without `INTERPOLATE`:
``` sql
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5;
```
Result:
``` text
┌───n─┬─source───┬─inter─┐
│ 0 │ │ 0 │
│ 0.5 │ │ 0 │
│ 1 │ original │ 1 │
│ 1.5 │ │ 0 │
│ 2 │ │ 0 │
│ 2.5 │ │ 0 │
│ 3 │ │ 0 │
│ 3.5 │ │ 0 │
│ 4 │ original │ 4 │
│ 4.5 │ │ 0 │
│ 5 │ │ 0 │
│ 5.5 │ │ 0 │
│ 7 │ original │ 7 │
└─────┴──────────┴───────┘
```
Same query after applying `INTERPOLATE`:
``` sql
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1);
```
Result:
``` text
┌───n─┬─source───┬─inter─┐
│ 0 │ │ 0 │
│ 0.5 │ │ 0 │
│ 1 │ original │ 1 │
│ 1.5 │ │ 2 │
│ 2 │ │ 3 │
│ 2.5 │ │ 4 │
│ 3 │ │ 5 │
│ 3.5 │ │ 6 │
│ 4 │ original │ 4 │
│ 4.5 │ │ 5 │
│ 5 │ │ 6 │
│ 5.5 │ │ 7 │
│ 7 │ original │ 7 │
└─────┴──────────┴───────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) <!--hide-->

View File

@ -20,7 +20,7 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
[WHERE expr]
[GROUP BY expr_list] [WITH ROLLUP|WITH CUBE] [WITH TOTALS]
[HAVING expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
[ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] [INTERPOLATE [(expr_list)]]
[LIMIT [offset_value, ]n BY columns]
[LIMIT [n, ]m] [WITH TIES]
[SETTINGS ...]

View File

@ -280,6 +280,7 @@ SELECT * FROM collate_test ORDER BY s ASC COLLATE 'en';
```sql
ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_expr], ... exprN [WITH FILL] [FROM expr] [TO expr] [STEP numeric_expr]
[INTERPOLATE [(col [AS expr], ... colN [AS exprN])]]
```
`WITH FILL` может быть применен к полям с числовыми (все разновидности float, int, decimal) или временными (все разновидности Date, DateTime) типами. В случае применения к полям типа `String` недостающие значения заполняются пустой строкой.
@ -289,6 +290,8 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_
Когда `STEP const_numeric_expr` не указан, тогда используется `1.0` для числовых типов, `1 день` для типа Date и `1 секунда` для типа DateTime.
`INTERPOLATE` может быть применен к колонкам, не участвующим в `ORDER BY WITH FILL`. Такие колонки заполняются значениями, вычисляемыми применением `expr` к предыдущему значению. Если `expr` опущен, то колонка заполняется предыдущим значением. Если список колонок не указан, то включаются все разрешенные колонки.
Пример запроса без использования `WITH FILL`:
```sql
SELECT n, source FROM (
@ -395,3 +398,58 @@ ORDER BY
│ 1970-03-12 │ 1970-01-08 │ original │
└────────────┴────────────┴──────────┘
```
Пример запроса без `INTERPOLATE`:
``` sql
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5;
```
Результат:
``` text
┌───n─┬─source───┬─inter─┐
│ 0 │ │ 0 │
│ 0.5 │ │ 0 │
│ 1 │ original │ 1 │
│ 1.5 │ │ 0 │
│ 2 │ │ 0 │
│ 2.5 │ │ 0 │
│ 3 │ │ 0 │
│ 3.5 │ │ 0 │
│ 4 │ original │ 4 │
│ 4.5 │ │ 0 │
│ 5 │ │ 0 │
│ 5.5 │ │ 0 │
│ 7 │ original │ 7 │
└─────┴──────────┴───────┘
```
Тот же запрос с `INTERPOLATE`:
``` sql
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter
FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS inter + 1);
```
Результат:
``` text
┌───n─┬─source───┬─inter─┐
│ 0 │ │ 0 │
│ 0.5 │ │ 0 │
│ 1 │ original │ 1 │
│ 1.5 │ │ 2 │
│ 2 │ │ 3 │
│ 2.5 │ │ 4 │
│ 3 │ │ 5 │
│ 3.5 │ │ 6 │
│ 4 │ original │ 4 │
│ 4.5 │ │ 5 │
│ 5 │ │ 6 │
│ 5.5 │ │ 7 │
│ 7 │ original │ 7 │
└─────┴──────────┴───────┘

View File

@ -137,14 +137,14 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
auto & dst_column_host_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["host_name"]]);
auto & dst_array_current_time = typeid_cast<ColumnUInt32 &>(*mutable_columns[name_pos["current_time"]]).getData();
auto & dst_array_thread_id = typeid_cast<ColumnUInt64 &>(*mutable_columns[name_pos["thread_id"]]).getData();
// auto & dst_array_thread_id = typeid_cast<ColumnUInt64 &>(*mutable_columns[name_pos["thread_id"]]).getData();
auto & dst_array_type = typeid_cast<ColumnInt8 &>(*mutable_columns[name_pos["type"]]).getData();
auto & dst_column_name = typeid_cast<ColumnString &>(*mutable_columns[name_pos["name"]]);
auto & dst_array_value = typeid_cast<ColumnInt64 &>(*mutable_columns[name_pos["value"]]).getData();
const auto & src_column_host_name = typeid_cast<const ColumnString &>(*src.getByName("host_name").column);
const auto & src_array_current_time = typeid_cast<const ColumnUInt32 &>(*src.getByName("current_time").column).getData();
// const auto & src_array_thread_id = typeid_cast<const ColumnUInt64 &>(*src.getByName("thread_id").column).getData();
const auto & src_array_thread_id = typeid_cast<const ColumnUInt64 &>(*src.getByName("thread_id").column).getData();
const auto & src_column_name = typeid_cast<const ColumnString &>(*src.getByName("name").column);
const auto & src_array_value = typeid_cast<const ColumnInt64 &>(*src.getByName("value").column).getData();
@ -169,6 +169,16 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
rows_by_name[id] = src_row;
}
/// Filter out snapshots
std::set<size_t> thread_id_filter_mask;
for (size_t i = 0; i < src_array_thread_id.size(); ++i)
{
if (src_array_thread_id[i] != 0)
{
thread_id_filter_mask.emplace(i);
}
}
/// Merge src into dst.
for (size_t dst_row = 0; dst_row < dst_rows; ++dst_row)
{
@ -180,6 +190,11 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
if (auto it = rows_by_name.find(id); it != rows_by_name.end())
{
size_t src_row = it->second;
if (thread_id_filter_mask.contains(src_row))
{
continue;
}
dst_array_current_time[dst_row] = src_array_current_time[src_row];
switch (dst_array_type[dst_row])
@ -199,24 +214,18 @@ static void incrementProfileEventsBlock(Block & dst, const Block & src)
/// Copy rows from src that dst does not contains.
for (const auto & [id, pos] : rows_by_name)
{
if (thread_id_filter_mask.contains(pos))
{
continue;
}
for (size_t col = 0; col < src.columns(); ++col)
{
mutable_columns[col]->insert((*src.getByPosition(col).column)[pos]);
}
}
/// Filter out snapshots
std::set<size_t> thread_id_filter_mask;
for (size_t i = 0; i < dst_array_thread_id.size(); ++i)
{
if (dst_array_thread_id[i] != 0)
{
thread_id_filter_mask.emplace(i);
}
}
dst.setColumns(std::move(mutable_columns));
dst.erase(thread_id_filter_mask);
}
@ -225,17 +234,16 @@ std::atomic_flag exit_on_signal;
class QueryInterruptHandler : private boost::noncopyable
{
public:
QueryInterruptHandler() { exit_on_signal.clear(); }
~QueryInterruptHandler() { exit_on_signal.test_and_set(); }
static void start() { exit_on_signal.clear(); }
/// Return true if the query was stopped.
static bool stop() { return exit_on_signal.test_and_set(); }
static bool cancelled() { return exit_on_signal.test(); }
};
/// This signal handler is set only for SIGINT.
void interruptSignalHandler(int signum)
{
if (exit_on_signal.test_and_set())
if (QueryInterruptHandler::stop())
safeExit(128 + signum);
}
@ -254,7 +262,7 @@ ClientBase::ClientBase() = default;
void ClientBase::setupSignalHandler()
{
exit_on_signal.test_and_set();
QueryInterruptHandler::stop();
struct sigaction new_act;
memset(&new_act, 0, sizeof(new_act));
@ -395,8 +403,16 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (need_render_progress && (stdout_is_a_tty || is_interactive) && !select_into_file)
progress_indication.clearProgressOutput();
output_format->write(materializeBlock(block));
written_first_block = true;
try
{
output_format->write(materializeBlock(block));
written_first_block = true;
}
catch (const Exception &)
{
/// Catch client errors like NO_ROW_DELIMITER
throw LocalFormatError(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
}
/// Received data block is immediately displayed to the user.
output_format->flush();
@ -685,6 +701,9 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
{
try
{
QueryInterruptHandler::start();
SCOPE_EXIT({ QueryInterruptHandler::stop(); });
connection->sendQuery(
connection_parameters.timeouts,
query,
@ -724,8 +743,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
/// Also checks if query execution should be cancelled.
void ClientBase::receiveResult(ASTPtr parsed_query)
{
QueryInterruptHandler query_interrupt_handler;
// TODO: get the poll_interval from commandline.
const auto receive_timeout = connection_parameters.timeouts.receive_timeout;
constexpr size_t default_poll_interval = 1000000; /// in microseconds
@ -760,7 +777,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query)
};
/// handler received sigint
if (query_interrupt_handler.cancelled())
if (QueryInterruptHandler::cancelled())
{
cancel_query();
}
@ -1413,6 +1430,8 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
progress_indication.clearProgressOutput();
logs_out_stream->writeProfileEvents(profile_events.last_block);
logs_out_stream->flush();
profile_events.last_block = {};
}
if (is_interactive)
@ -1838,7 +1857,7 @@ void ClientBase::runInteractive()
}
LineReader::Patterns query_extenders = {"\\"};
LineReader::Patterns query_delimiters = {";", "\\G"};
LineReader::Patterns query_delimiters = {";", "\\G", "\\G;"};
#if USE_REPLXX
replxx::Replxx::highlighter_callback_t highlight_callback{};
@ -1860,9 +1879,13 @@ void ClientBase::runInteractive()
break;
has_vertical_output_suffix = false;
if (input.ends_with("\\G"))
if (input.ends_with("\\G") || input.ends_with("\\G;"))
{
input.resize(input.size() - 2);
if (input.ends_with("\\G"))
input.resize(input.size() - 2);
else if (input.ends_with("\\G;"))
input.resize(input.size() - 3);
has_vertical_output_suffix = true;
}

View File

@ -201,9 +201,6 @@ void LocalConnection::finishQuery()
{
next_packet_type = Protocol::Server::EndOfStream;
if (!state)
return;
if (state->executor)
{
state->executor.reset();
@ -219,6 +216,7 @@ void LocalConnection::finishQuery()
state->io.onFinish();
state.reset();
last_sent_snapshots.clear();
}
bool LocalConnection::poll(size_t)
@ -326,6 +324,21 @@ bool LocalConnection::poll(size_t)
}
}
if (state->is_finished && !state->sent_profile_events)
{
state->sent_profile_events = true;
if (send_profile_events && state->executor)
{
Block block;
state->after_send_profile_events.restart();
next_packet_type = Protocol::Server::ProfileEvents;
getProfileEvents(block);
state->block.emplace(std::move(block));
return true;
}
}
if (state->is_finished)
{
finishQuery();

View File

@ -47,6 +47,7 @@ struct LocalQueryState
bool sent_extremes = false;
bool sent_progress = false;
bool sent_profile_info = false;
bool sent_profile_events = false;
/// To output progress, the difference after the previous sending of progress.
Progress progress;

View File

@ -57,6 +57,8 @@ struct ZooKeeperRequest : virtual Request
bool restored_from_zookeeper_log = false;
UInt64 request_created_time_ns = 0;
UInt64 thread_id = 0;
String query_id;
ZooKeeperRequest() = default;
ZooKeeperRequest(const ZooKeeperRequest &) = default;

View File

@ -8,6 +8,7 @@
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <base/logger_useful.h>
#include <base/getThreadId.h>
#include <Common/config.h>
@ -1016,6 +1017,11 @@ void ZooKeeper::pushRequest(RequestInfo && info)
try
{
info.time = clock::now();
if (zk_log)
{
info.request->thread_id = getThreadId();
info.request->query_id = String(CurrentThread::getQueryId());
}
if (!info.request->xid)
{
@ -1269,6 +1275,11 @@ void ZooKeeper::logOperationIfNeeded(const ZooKeeperRequestPtr & request, const
elem.event_time = event_time;
elem.address = socket_address;
elem.session_id = session_id;
if (request)
{
elem.thread_id = request->thread_id;
elem.query_id = request->query_id;
}
maybe_zk_log->add(elem);
}
}

View File

@ -1,4 +1,4 @@
#include <Common/renameat2.h>
#include <Common/atomicRename.h>
#include <Common/Exception.h>
#include <Common/VersionNumber.h>
#include <Poco/Environment.h>
@ -55,7 +55,7 @@ namespace ErrorCodes
namespace DB
{
static bool supportsRenameat2Impl()
static bool supportsAtomicRenameImpl()
{
VersionNumber renameat2_minimal_version(3, 15, 0);
VersionNumber linux_version(Poco::Environment::osVersion());
@ -64,7 +64,7 @@ static bool supportsRenameat2Impl()
static bool renameat2(const std::string & old_path, const std::string & new_path, int flags)
{
if (!supportsRenameat2())
if (!supportsAtomicRename())
return false;
if (old_path.empty() || new_path.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot rename {} to {}: path is empty", old_path, new_path);
@ -93,9 +93,69 @@ static bool renameat2(const std::string & old_path, const std::string & new_path
throwFromErrnoWithPath(fmt::format("Cannot rename {} to {}", old_path, new_path), new_path, ErrorCodes::SYSTEM_ERROR);
}
bool supportsRenameat2()
bool supportsAtomicRename()
{
static bool supports = supportsRenameat2Impl();
static bool supports = supportsAtomicRenameImpl();
return supports;
}
}
#elif defined(__APPLE__)
// Includes
#include <dlfcn.h> // For dlsym
#include <stdio.h> // For renamex_np
#include <string.h> // For stderror
#ifndef RENAME_SWAP
#define RENAME_SWAP 0x00000002
#endif
#ifndef RENAME_EXCL
#define RENAME_EXCL 0x00000004
#endif
#define RENAME_NOREPLACE RENAME_EXCL
#define RENAME_EXCHANGE RENAME_SWAP
namespace DB
{
static bool renameat2(const std::string & old_path, const std::string & new_path, int flags)
{
using function_type = int (*)(const char * from, const char * to, unsigned int flags);
static function_type fun = reinterpret_cast<function_type>(dlsym(RTLD_DEFAULT, "renamex_np"));
if (fun == nullptr)
return false;
if (old_path.empty() || new_path.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot rename {} to {}: path is empty", old_path, new_path);
if (0 == (*fun)(old_path.c_str(), new_path.c_str(), flags))
return true;
int errnum = errno;
if (errnum == ENOTSUP || errnum == EINVAL)
return false;
if (errnum == EEXIST)
throwFromErrno(fmt::format("Cannot rename {} to {} because the second path already exists", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
if (errnum == ENOENT)
throwFromErrno(fmt::format("Paths cannot be exchanged because {} or {} does not exist", old_path, new_path), ErrorCodes::ATOMIC_RENAME_FAIL);
throwFromErrnoWithPath(
fmt::format("Cannot rename {} to {}: {}", old_path, new_path, strerror(errnum)), new_path, ErrorCodes::SYSTEM_ERROR);
}
static bool supportsAtomicRenameImpl()
{
auto fun = dlsym(RTLD_DEFAULT, "renamex_np");
return fun != nullptr;
}
bool supportsAtomicRename()
{
static bool supports = supportsAtomicRenameImpl();
return supports;
}
@ -114,7 +174,7 @@ static bool renameat2(const std::string &, const std::string &, int)
return false;
}
bool supportsRenameat2()
bool supportsAtomicRename()
{
return false;
}

View File

@ -6,7 +6,7 @@ namespace DB
{
/// Returns true, if the following functions supported by the system
bool supportsRenameat2();
bool supportsAtomicRename();
/// Atomically rename old_path to new_path. If new_path exists, do not overwrite it and throw exception
void renameNoReplace(const std::string & old_path, const std::string & new_path);

View File

@ -407,7 +407,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction)
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
changelog.compact(6);
std::this_thread::sleep_for(std::chrono::microseconds(200));
std::this_thread::sleep_for(std::chrono::microseconds(1000));
EXPECT_FALSE(fs::exists("./logs/changelog_1_5.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension));
@ -1469,7 +1469,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
}
changelog_2.compact(105);
std::this_thread::sleep_for(std::chrono::microseconds(200));
std::this_thread::sleep_for(std::chrono::microseconds(1000));
EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin" + params.extension));
EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin" + params.extension));
@ -1489,7 +1489,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges)
}
changelog_3.compact(125);
std::this_thread::sleep_for(std::chrono::microseconds(200));
std::this_thread::sleep_for(std::chrono::microseconds(1000));
EXPECT_FALSE(fs::exists("./logs/changelog_101_110.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_111_117.bin" + params.extension));
EXPECT_FALSE(fs::exists("./logs/changelog_118_124.bin" + params.extension));

View File

@ -0,0 +1,32 @@
#include <Core/Block.h>
#include <IO/Operators.h>
#include <Common/JSONBuilder.h>
#include <Core/InterpolateDescription.h>
#include <Interpreters/convertFieldToType.h>
namespace DB
{
InterpolateDescription::InterpolateDescription(ActionsDAGPtr actions_, const Aliases & aliases)
: actions(actions_)
{
for (const auto & name_type : actions->getRequiredColumns())
{
if (const auto & p = aliases.find(name_type.name); p != aliases.end())
required_columns_map[p->second->getColumnName()] = name_type;
else
required_columns_map[name_type.name] = name_type;
}
for (const ColumnWithTypeAndName & column : actions->getResultColumns())
{
std::string name = column.name;
if (const auto & p = aliases.find(name); p != aliases.end())
name = p->second->getColumnName();
result_columns_set.insert(name);
result_columns_order.push_back(name);
}
}
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <unordered_map>
#include <memory>
#include <cstddef>
#include <string>
#include <Core/Field.h>
#include <Core/SettingsEnums.h>
#include <Common/IntervalKind.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Interpreters/Aliases.h>
namespace DB
{
/// Interpolate description
struct InterpolateDescription
{
explicit InterpolateDescription(ActionsDAGPtr actions, const Aliases & aliases);
ActionsDAGPtr actions;
std::unordered_map<std::string, NameAndTypePair> required_columns_map; /// input column name -> {alias, type}
std::unordered_set<std::string> result_columns_set; /// result block columns
std::vector<std::string> result_columns_order; /// result block columns order
};
using InterpolateDescriptionPtr = std::shared_ptr<InterpolateDescription>;
}

View File

@ -7,6 +7,7 @@
#include <Core/Field.h>
#include <Core/SettingsEnums.h>
#include <Common/IntervalKind.h>
#include <DataTypes/IDataType.h>
class Collator;

View File

@ -4,7 +4,7 @@
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromFile.h>
#include <Parsers/formatAST.h>
#include <Common/renameat2.h>
#include <Common/atomicRename.h>
#include <Storages/StorageMaterializedView.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
@ -158,7 +158,7 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
return;
}
if (exchange && !supportsRenameat2())
if (exchange && !supportsAtomicRename())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported");
auto & other_db = dynamic_cast<DatabaseAtomic &>(to_database);

View File

@ -6,7 +6,7 @@
#include <Interpreters/Context.h>
#include <Common/filesystemHelpers.h>
#include <Common/quoteString.h>
#include <Common/renameat2.h>
#include <Common/atomicRename.h>
#include <Disks/IO/createReadBufferFromFileBase.h>
#include <fstream>

View File

@ -2,7 +2,11 @@
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnsNumber.h>
#include <Interpreters/castColumn.h>
@ -17,20 +21,69 @@ namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
namespace
{
// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
/// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
inline constexpr Int64 YearMonthDayToSingleInt(Int64 year, Int64 month, Int64 day)
{
return year * 512 + month * 32 + day;
}
// Common implementation for makeDate, makeDate32
/// Common logic to handle numeric arguments like year, month, day, hour, minute, second
class FunctionWithNumericParamsBase : public IFunction
{
public:
bool isInjective(const ColumnsWithTypeAndName &) const override
{
return false; /// invalid argument values and timestamps that are out of supported range are converted into a default value
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForNulls() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
protected:
template <class AgrumentNames>
void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const AgrumentNames & argument_names, const size_t optional_argument_count) const
{
if (arguments.size() < argument_names.size() || arguments.size() > argument_names.size() + optional_argument_count)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires {} to {} arguments, but {} given",
getName(), argument_names.size(), argument_names.size() + optional_argument_count, arguments.size());
for (size_t i = 0; i < argument_names.size(); ++i)
{
DataTypePtr argument_type = arguments[i].type;
if (!isNumber(argument_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
}
}
template <class AgrumentNames>
void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, const AgrumentNames & argument_names, Columns & converted_arguments) const
{
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
converted_arguments.clear();
converted_arguments.reserve(arguments.size());
for (size_t i = 0; i < argument_names.size(); ++i)
{
ColumnPtr argument_column = castColumn(arguments[i], converted_argument_type);
argument_column = argument_column->convertToFullColumnIfConst();
converted_arguments.push_back(argument_column);
}
}
};
/// Common implementation for makeDate, makeDate32
template <typename Traits>
class FunctionMakeDate : public IFunction
class FunctionMakeDate : public FunctionWithNumericParamsBase
{
private:
static constexpr std::array<const char*, 3> argument_names = {"year", "month", "day"};
@ -46,45 +99,17 @@ public:
size_t getNumberOfArguments() const override { return argument_names.size(); }
bool isInjective(const ColumnsWithTypeAndName &) const override
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
return false; // {year,month,day} that are out of supported range are converted into a default value
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForNulls() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != argument_names.size())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Function {} requires 3 arguments, but {} given", getName(), arguments.size());
for (size_t i = 0; i < argument_names.size(); ++i)
{
DataTypePtr argument_type = arguments[i];
if (!isNumber(argument_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
}
checkRequiredArguments(arguments, argument_names, 0);
return std::make_shared<typename Traits::ReturnDataType>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
Columns converted_arguments;
converted_arguments.reserve(arguments.size());
for (const auto & argument : arguments)
{
ColumnPtr argument_column = castColumn(argument, converted_argument_type);
argument_column = argument_column->convertToFullColumnIfConst();
converted_arguments.push_back(argument_column);
}
convertRequiredArguments(arguments, argument_names, converted_arguments);
auto res_column = Traits::ReturnColumnType::create(input_rows_count);
auto & result_data = res_column->getData();
@ -119,7 +144,7 @@ public:
}
};
// makeDate(year, month, day)
/// makeDate(year, month, day)
struct MakeDateTraits
{
static constexpr auto name = "makeDate";
@ -128,11 +153,11 @@ struct MakeDateTraits
static constexpr auto MIN_YEAR = 1970;
static constexpr auto MAX_YEAR = 2149;
// This date has the maximum day number that fits in 16-bit uint
/// This date has the maximum day number that fits in 16-bit uint
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 6, 6);
};
// makeDate32(year, month, day)
/// makeDate32(year, month, day)
struct MakeDate32Traits
{
static constexpr auto name = "makeDate32";
@ -144,12 +169,276 @@ struct MakeDate32Traits
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 11, 11);
};
/// Common implementation for makeDateTime, makeDateTime64
class FunctionMakeDateTimeBase : public FunctionWithNumericParamsBase
{
protected:
static constexpr std::array<const char*, 6> argument_names = {"year", "month", "day", "hour", "minute", "second"};
public:
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
protected:
void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const size_t optional_argument_count) const
{
FunctionWithNumericParamsBase::checkRequiredArguments(arguments, argument_names, optional_argument_count);
}
void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, Columns & converted_arguments) const
{
FunctionWithNumericParamsBase::convertRequiredArguments(arguments, argument_names, converted_arguments);
}
template <typename T>
static Int64 dateTime(T year, T month, T day_of_month, T hour, T minute, T second, const DateLUTImpl & lut)
{
/// Note that hour, minute and second are checked against 99 to behave consistently with parsing DateTime from String
/// E.g. "select cast('1984-01-01 99:99:99' as DateTime);" returns "1984-01-05 04:40:39"
if (unlikely(std::isnan(year) || std::isnan(month) || std::isnan(day_of_month) ||
std::isnan(hour) || std::isnan(minute) || std::isnan(second) ||
year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31 ||
hour < 0 || hour > 99 || minute < 0 || minute > 99 || second < 0 || second > 99))
return minDateTime(lut);
if (unlikely(year > DATE_LUT_MAX_YEAR))
return maxDateTime(lut);
return lut.makeDateTime(year, month, day_of_month, hour, minute, second);
}
static Int64 minDateTime(const DateLUTImpl & lut)
{
return lut.makeDateTime(DATE_LUT_MIN_YEAR - 1, 1, 1, 0, 0, 0);
}
static Int64 maxDateTime(const DateLUTImpl & lut)
{
return lut.makeDateTime(DATE_LUT_MAX_YEAR + 1, 1, 1, 23, 59, 59);
}
std::string extractTimezone(const ColumnWithTypeAndName & timezone_argument) const
{
std::string timezone;
if (!isStringOrFixedString(timezone_argument.type) || !timezone_argument.column || (timezone_argument.column->size() != 1 && !typeid_cast<const ColumnConst*>(timezone_argument.column.get())))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument 'timezone' for function {} must be const string", getName());
timezone = timezone_argument.column->getDataAt(0).toString();
return timezone;
}
};
/// makeDateTime(year, month, day, hour, minute, second, [timezone])
class FunctionMakeDateTime : public FunctionMakeDateTimeBase
{
private:
static constexpr std::array<const char*, 1> optional_argument_names = {"timezone"};
public:
static constexpr auto name = "makeDateTime";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMakeDateTime>(); }
String getName() const override { return name; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
checkRequiredArguments(arguments, optional_argument_names.size());
/// Optional timezone argument
std::string timezone;
if (arguments.size() == argument_names.size() + 1)
timezone = extractTimezone(arguments.back());
return std::make_shared<DataTypeDateTime>(timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// Optional timezone argument
std::string timezone;
if (arguments.size() == argument_names.size() + 1)
timezone = extractTimezone(arguments.back());
Columns converted_arguments;
convertRequiredArguments(arguments, converted_arguments);
auto res_column = ColumnUInt32::create(input_rows_count);
auto & result_data = res_column->getData();
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
const auto & hour_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[3]).getData();
const auto & minute_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[4]).getData();
const auto & second_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[5]).getData();
const auto & date_lut = DateLUT::instance(timezone);
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto year = year_data[i];
const auto month = month_data[i];
const auto day = day_data[i];
const auto hour = hour_data[i];
const auto minute = minute_data[i];
const auto second = second_data[i];
auto date_time = dateTime(year, month, day, hour, minute, second, date_lut);
if (unlikely(date_time < 0))
date_time = 0;
else if (unlikely(date_time > 0x0ffffffffll))
date_time = 0x0ffffffffll;
result_data[i] = date_time;
}
return res_column;
}
};
/// makeDateTime64(year, month, day, hour, minute, second, [fraction], [precision], [timezone])
class FunctionMakeDateTime64 : public FunctionMakeDateTimeBase
{
private:
static constexpr std::array<const char*, 3> optional_argument_names = {"fraction", "precision", "timezone"};
static constexpr UInt8 DEFAULT_PRECISION = 3;
public:
static constexpr auto name = "makeDateTime64";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMakeDateTime64>(); }
String getName() const override { return name; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
checkRequiredArguments(arguments, optional_argument_names.size());
if (arguments.size() >= argument_names.size() + 1)
{
const auto& fraction_argument = arguments[argument_names.size()];
if (!isNumber(fraction_argument.type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument 'fraction' for function {} must be number", getName());
}
/// Optional precision argument
Int64 precision = DEFAULT_PRECISION;
if (arguments.size() >= argument_names.size() + 2)
precision = extractPrecision(arguments[argument_names.size() + 1]);
/// Optional timezone argument
std::string timezone;
if (arguments.size() == argument_names.size() + 3)
timezone = extractTimezone(arguments.back());
return std::make_shared<DataTypeDateTime64>(precision, timezone);
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
/// Optional precision argument
Int64 precision = DEFAULT_PRECISION;
if (arguments.size() >= argument_names.size() + 2)
precision = extractPrecision(arguments[argument_names.size() + 1]);
/// Optional timezone argument
std::string timezone;
if (arguments.size() == argument_names.size() + 3)
timezone = extractTimezone(arguments.back());
Columns converted_arguments;
convertRequiredArguments(arguments, converted_arguments);
/// Optional fraction argument
const ColumnVector<Float64>::Container * fraction_data = nullptr;
if (arguments.size() >= argument_names.size() + 1)
{
ColumnPtr fraction_column = castColumn(arguments[argument_names.size()], std::make_shared<DataTypeFloat64>());
fraction_column = fraction_column->convertToFullColumnIfConst();
converted_arguments.push_back(fraction_column);
fraction_data = &typeid_cast<const ColumnFloat64 &>(*converted_arguments[6]).getData();
}
auto res_column = ColumnDecimal<DateTime64>::create(input_rows_count, precision);
auto & result_data = res_column->getData();
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
const auto & hour_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[3]).getData();
const auto & minute_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[4]).getData();
const auto & second_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[5]).getData();
const auto & date_lut = DateLUT::instance(timezone);
const auto max_fraction = pow(10, precision) - 1;
const auto min_date_time = minDateTime(date_lut);
const auto max_date_time = maxDateTime(date_lut);
for (size_t i = 0; i < input_rows_count; ++i)
{
const auto year = year_data[i];
const auto month = month_data[i];
const auto day = day_data[i];
const auto hour = hour_data[i];
const auto minute = minute_data[i];
const auto second = second_data[i];
auto date_time = dateTime(year, month, day, hour, minute, second, date_lut);
double fraction = 0;
if (unlikely(date_time == min_date_time))
fraction = 0;
else if (unlikely(date_time == max_date_time))
fraction = 999999999ll;
else
{
fraction = fraction_data ? (*fraction_data)[i] : 0;
if (unlikely(std::isnan(fraction)))
{
date_time = min_date_time;
fraction = 0;
}
else if (unlikely(fraction < 0))
fraction = 0;
else if (unlikely(fraction > max_fraction))
fraction = max_fraction;
}
result_data[i] = DecimalUtils::decimalFromComponents<DateTime64>(date_time, fraction, precision);
}
return res_column;
}
private:
UInt8 extractPrecision(const ColumnWithTypeAndName & precision_argument) const
{
Int64 precision = DEFAULT_PRECISION;
if (!isNumber(precision_argument.type) || !precision_argument.column || (precision_argument.column->size() != 1 && !typeid_cast<const ColumnConst*>(precision_argument.column.get())))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Argument 'precision' for function {} must be constant number", getName());
precision = precision_argument.column->getInt(0);
if (precision < 0 || precision > 9)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Argument 'precision' for function {} must be in range [0, 9]", getName());
return precision;
}
};
}
void registerFunctionsMakeDate(FunctionFactory & factory)
{
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>();
factory.registerFunction<FunctionMakeDate<MakeDate32Traits>>();
factory.registerFunction<FunctionMakeDateTime>();
factory.registerFunction<FunctionMakeDateTime64>();
}
}

View File

@ -33,6 +33,7 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
++active_working_reader;
schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); });
return true;
@ -203,11 +204,6 @@ bool ParallelReadBuffer::nextImpl()
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
{
{
std::lock_guard lock{mutex};
++active_working_reader;
}
SCOPE_EXIT({
std::lock_guard lock{mutex};
--active_working_reader;

View File

@ -12,7 +12,7 @@
#include <Parsers/formatAST.h>
#include <IO/ReadHelpers.h>
#include <Poco/DirectoryIterator.h>
#include <Common/renameat2.h>
#include <Common/atomicRename.h>
#include <Common/CurrentMetrics.h>
#include <base/logger_useful.h>
#include <Poco/Util/AbstractConfiguration.h>

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTWindowDefinition.h>
#include <Parsers/DumpASTNode.h>
#include <Parsers/ASTInterpolateElement.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/IColumn.h>
@ -1333,6 +1334,38 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
with_fill = true;
}
if (auto interpolate_list = select_query->interpolate())
{
NameSet select;
for (const auto & child : select_query->select()->children)
select.insert(child->getAliasOrColumnName());
/// collect columns required for interpolate expressions -
/// interpolate expression can use any available column
auto find_columns = [&step, &select](IAST * function)
{
auto f_impl = [&step, &select](IAST * fn, auto fi)
{
if (auto * ident = fn->as<ASTIdentifier>())
{
/// exclude columns from select expression - they are already available
if (select.count(ident->getColumnName()) == 0)
step.addRequiredOutput(ident->getColumnName());
return;
}
if (fn->as<ASTFunction>() || fn->as<ASTExpressionList>())
for (const auto & ch : fn->children)
fi(ch.get(), fi);
return;
};
f_impl(function, f_impl);
};
for (const auto & interpolate : interpolate_list->children)
find_columns(interpolate->as<ASTInterpolateElement>()->expr.get());
}
if (optimize_read_in_order)
{
for (auto & child : select_query->orderBy()->children)

View File

@ -19,26 +19,27 @@ bool equals(const Field & lhs, const Field & rhs)
}
FillingRow::FillingRow(const SortDescription & sort_description) : description(sort_description)
FillingRow::FillingRow(const SortDescription & sort_description_)
: sort_description(sort_description_)
{
row.resize(description.size());
row.resize(sort_description.size());
}
bool FillingRow::operator<(const FillingRow & other) const
{
for (size_t i = 0; i < size(); ++i)
for (size_t i = 0; i < sort_description.size(); ++i)
{
if (row[i].isNull() || other[i].isNull() || equals(row[i], other[i]))
if ((*this)[i].isNull() || other.row[i].isNull() || equals(row[i], other.row[i]))
continue;
return less(row[i], other[i], getDirection(i));
return less(row[i], other.row[i], getDirection(i));
}
return false;
}
bool FillingRow::operator==(const FillingRow & other) const
{
for (size_t i = 0; i < size(); ++i)
if (!equals(row[i], other[i]))
for (size_t i = 0; i < sort_description.size(); ++i)
if (!equals(row[i], other.row[i]))
return false;
return true;
}
@ -48,16 +49,16 @@ bool FillingRow::next(const FillingRow & to_row)
size_t pos = 0;
/// Find position we need to increment for generating next row.
for (; pos < row.size(); ++pos)
if (!row[pos].isNull() && !to_row[pos].isNull() && !equals(row[pos], to_row[pos]))
for (; pos < size(); ++pos)
if (!row[pos].isNull() && !to_row.row[pos].isNull() && !equals(row[pos], to_row.row[pos]))
break;
if (pos == row.size() || less(to_row[pos], row[pos], getDirection(pos)))
if (pos == size() || less(to_row.row[pos], row[pos], getDirection(pos)))
return false;
/// If we have any 'fill_to' value at position greater than 'pos',
/// we need to generate rows up to 'fill_to' value.
for (size_t i = row.size() - 1; i > pos; --i)
for (size_t i = size() - 1; i > pos; --i)
{
if (getFillDescription(i).fill_to.isNull() || row[i].isNull())
continue;
@ -75,21 +76,22 @@ bool FillingRow::next(const FillingRow & to_row)
auto next_value = row[pos];
getFillDescription(pos).step_func(next_value);
if (less(to_row[pos], next_value, getDirection(pos)))
if (less(to_row.row[pos], next_value, getDirection(pos)))
return false;
row[pos] = next_value;
if (equals(row[pos], to_row[pos]))
if (equals(row[pos], to_row.row[pos]))
{
bool is_less = false;
for (size_t i = pos + 1; i < size(); ++i)
size_t i = pos + 1;
for (; i < size(); ++i)
{
const auto & fill_from = getFillDescription(i).fill_from;
if (!fill_from.isNull())
row[i] = fill_from;
else
row[i] = to_row[i];
is_less |= less(row[i], to_row[i], getDirection(i));
row[i] = to_row.row[i];
is_less |= less(row[i], to_row.row[i], getDirection(i));
}
return is_less;
@ -101,12 +103,12 @@ bool FillingRow::next(const FillingRow & to_row)
void FillingRow::initFromDefaults(size_t from_pos)
{
for (size_t i = from_pos; i < row.size(); ++i)
for (size_t i = from_pos; i < sort_description.size(); ++i)
row[i] = getFillDescription(i).fill_from;
}
void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & other_columns, const FillingRow & filling_row)
void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns,
const FillingRow & filling_row, const Block & interpolate_block)
{
for (size_t i = 0; i < filling_columns.size(); ++i)
{
@ -116,6 +118,16 @@ void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & oth
filling_columns[i]->insert(filling_row[i]);
}
if (size_t size = interpolate_block.columns())
{
Columns columns = interpolate_block.getColumns();
for (size_t i = 0; i < size; ++i)
interpolate_columns[i]->insertFrom(*columns[i]->convertToFullColumnIfConst(), 0);
}
else
for (const auto & interpolate_column : interpolate_columns)
interpolate_column->insertDefault();
for (const auto & other_column : other_columns)
other_column->insertDefault();
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/SortDescription.h>
#include <Core/InterpolateDescription.h>
#include <Columns/IColumn.h>
@ -17,7 +18,7 @@ bool equals(const Field & lhs, const Field & rhs);
class FillingRow
{
public:
FillingRow(const SortDescription & sort_description);
explicit FillingRow(const SortDescription & sort_description);
/// Generates next row according to fill 'from', 'to' and 'step' values.
bool next(const FillingRow & to_row);
@ -30,15 +31,16 @@ public:
bool operator<(const FillingRow & other) const;
bool operator==(const FillingRow & other) const;
int getDirection(size_t index) const { return description[index].direction; }
FillColumnDescription & getFillDescription(size_t index) { return description[index].fill_description; }
int getDirection(size_t index) const { return sort_description[index].direction; }
FillColumnDescription & getFillDescription(size_t index) { return sort_description[index].fill_description; }
private:
Row row;
SortDescription description;
SortDescription sort_description;
};
void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & other_columns, const FillingRow & filling_row);
void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns,
const FillingRow & filling_row, const Block & interpolate_block);
void copyRowFromColumns(MutableColumns & dest, const Columns & source, size_t row_num);
}

View File

@ -8,7 +8,7 @@
#include <Common/typeid_cast.h>
#include <Common/Macros.h>
#include <Common/randomSeed.h>
#include <Common/renameat2.h>
#include <Common/atomicRename.h>
#include <Common/hex.h>
#include <Core/Defines.h>

View File

@ -5,6 +5,7 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -100,6 +101,7 @@ namespace ErrorCodes
extern const int INVALID_LIMIT_EXPRESSION;
extern const int INVALID_WITH_FILL_EXPRESSION;
extern const int ACCESS_DENIED;
extern const int UNKNOWN_IDENTIFIER;
}
/// Assumes `storage` is set and the table filter (row-level security) is not empty.
@ -780,6 +782,7 @@ static std::pair<Field, std::optional<IntervalKind>> getWithFillStep(const ASTPt
static FillColumnDescription getWithFillDescription(const ASTOrderByElement & order_by_elem, ContextPtr context)
{
FillColumnDescription descr;
if (order_by_elem.fill_from)
descr.fill_from = getWithFillFieldValue(order_by_elem.fill_from, context);
if (order_by_elem.fill_to)
@ -835,7 +838,6 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
std::shared_ptr<Collator> collator;
if (order_by_elem.collation)
collator = std::make_shared<Collator>(order_by_elem.collation->as<ASTLiteral &>().value.get<String>());
if (order_by_elem.with_fill)
{
FillColumnDescription fill_desc = getWithFillDescription(order_by_elem, context);
@ -848,6 +850,77 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
return order_descr;
}
static InterpolateDescriptionPtr getInterpolateDescription(
const ASTSelectQuery & query, const Block & source_block, const Block & result_block, const Aliases & aliases, ContextPtr context)
{
InterpolateDescriptionPtr interpolate_descr;
if (query.interpolate())
{
NamesAndTypesList source_columns;
ColumnsWithTypeAndName result_columns;
ASTPtr exprs = std::make_shared<ASTExpressionList>();
if (query.interpolate()->children.empty())
{
std::unordered_map<String, DataTypePtr> column_names;
for (const auto & column : result_block.getColumnsWithTypeAndName())
column_names[column.name] = column.type;
for (const auto & elem : query.orderBy()->children)
if (elem->as<ASTOrderByElement>()->with_fill)
column_names.erase(elem->as<ASTOrderByElement>()->children.front()->getColumnName());
for (const auto & [name, type] : column_names)
{
source_columns.emplace_back(name, type);
result_columns.emplace_back(type, name);
exprs->children.emplace_back(std::make_shared<ASTIdentifier>(name));
}
}
else
{
NameSet col_set;
for (const auto & elem : query.interpolate()->children)
{
const auto & interpolate = elem->as<ASTInterpolateElement &>();
if (const ColumnWithTypeAndName *result_block_column = result_block.findByName(interpolate.column))
{
if (!col_set.insert(result_block_column->name).second)
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Duplicate INTERPOLATE column '{}'", interpolate.column);
result_columns.emplace_back(result_block_column->type, result_block_column->name);
}
else
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Missing column '{}' as an INTERPOLATE expression target", interpolate.column);
exprs->children.emplace_back(interpolate.expr->clone());
}
col_set.clear();
for (const auto & column : source_block)
{
source_columns.emplace_back(column.name, column.type);
col_set.insert(column.name);
}
for (const auto & column : result_block)
if (col_set.count(column.name) == 0)
source_columns.emplace_back(column.name, column.type);
}
auto syntax_result = TreeRewriter(context).analyze(exprs, source_columns);
ExpressionAnalyzer analyzer(exprs, syntax_result, context);
ActionsDAGPtr actions = analyzer.getActionsDAG(true);
ActionsDAGPtr conv_dag = ActionsDAG::makeConvertingActions(actions->getResultColumns(),
result_columns, ActionsDAG::MatchColumnsMode::Position, true);
ActionsDAGPtr merge_dag = ActionsDAG::merge(std::move(*actions->clone()), std::move(*conv_dag));
interpolate_descr = std::make_shared<InterpolateDescription>(merge_dag, aliases);
}
return interpolate_descr;
}
static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
{
SortDescription order_descr;
@ -2515,7 +2588,9 @@ void InterpreterSelectQuery::executeWithFill(QueryPlan & query_plan)
if (fill_descr.empty())
return;
auto filling_step = std::make_unique<FillingStep>(query_plan.getCurrentDataStream(), std::move(fill_descr));
InterpolateDescriptionPtr interpolate_descr =
getInterpolateDescription(query, source_header, result_header, syntax_analyzer_result->aliases, context);
auto filling_step = std::make_unique<FillingStep>(query_plan.getCurrentDataStream(), std::move(fill_descr), interpolate_descr);
query_plan.addStep(std::move(filling_step));
}
}

View File

@ -250,21 +250,6 @@ bool MergeTreeTransaction::rollback() noexcept
/// Discard changes in active parts set
/// Remove parts that were created, restore parts that were removed (except parts that were created by this transaction too)
for (const auto & part : parts_to_remove)
{
if (part->version.isRemovalTIDLocked())
{
/// Don't need to remove part from working set if it was created and removed by this transaction
assert(part->version.removal_tid_lock == tid.getHash());
continue;
}
/// FIXME do not lock removal_tid when rolling back part creation, it's ugly
const_cast<MergeTreeData &>(part->storage).removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part}, true);
}
for (const auto & part : parts_to_activate)
if (part->version.getCreationTID() != tid)
const_cast<MergeTreeData &>(part->storage).restoreAndActivatePart(part);
/// Kind of optimization: cleanup thread can remove these parts immediately
for (const auto & part : parts_to_remove)
@ -274,6 +259,18 @@ bool MergeTreeTransaction::rollback() noexcept
part->appendCSNToVersionMetadata(VersionMetadata::CREATION);
}
for (const auto & part : parts_to_remove)
{
/// NOTE It's possible that part is already removed from working set in the same transaction
/// (or, even worse, in a separate non-transactional query with PrehistoricTID),
/// but it's not a problem: removePartsFromWorkingSet(...) will do nothing in this case.
const_cast<MergeTreeData &>(part->storage).removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part}, true);
}
for (const auto & part : parts_to_activate)
if (part->version.getCreationTID() != tid)
const_cast<MergeTreeData &>(part->storage).restoreAndActivatePart(part);
for (const auto & part : parts_to_activate)
{
/// Clear removal_tid from version metadata file, so we will not need to distinguish TIDs that were not committed

View File

@ -105,7 +105,7 @@ void getProfileEvents(
{"value", std::make_shared<DataTypeInt64>()},
};
ColumnsWithTypeAndName temp_columns;
ColumnsWithTypeAndName temp_columns;
for (auto const & name_and_type : column_names_and_types)
temp_columns.emplace_back(name_and_type.type, name_and_type.name);

View File

@ -9,6 +9,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTQueryParameter.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/quoteString.h>
#include <IO/WriteHelpers.h>
@ -134,7 +135,8 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &
static bool needVisitChild(const ASTPtr & child)
{
return !(child->as<ASTSelectQuery>() || child->as<ASTTableExpression>());
/// exclude interpolate elements - they are not subject for normalization and will be processed in filling transform
return !(child->as<ASTSelectQuery>() || child->as<ASTTableExpression>() || child->as<ASTInterpolateElement>());
}
/// special visitChildren() for ASTSelectQuery

View File

@ -7,6 +7,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTInterpolateElement.h>
namespace DB
{
@ -46,7 +47,7 @@ bool RequiredSourceColumnsMatcher::needChildVisit(const ASTPtr & node, const AST
return false;
/// Processed. Do not need children.
if (node->as<ASTTableExpression>() || node->as<ASTArrayJoin>() || node->as<ASTSelectQuery>())
if (node->as<ASTTableExpression>() || node->as<ASTArrayJoin>() || node->as<ASTSelectQuery>() || node->as<ASTInterpolateElement>())
return false;
if (const auto * f = node->as<ASTFunction>())
@ -114,15 +115,42 @@ void RequiredSourceColumnsMatcher::visit(const ASTPtr & ast, Data & data)
void RequiredSourceColumnsMatcher::visit(const ASTSelectQuery & select, const ASTPtr &, Data & data)
{
NameSet select_columns;
/// special case for top-level SELECT items: they are publics
for (auto & node : select.select()->children)
{
select_columns.insert(node->getAliasOrColumnName());
if (const auto * identifier = node->as<ASTIdentifier>())
data.addColumnIdentifier(*identifier);
else
data.addColumnAliasIfAny(*node);
}
if (auto interpolate_list = select.interpolate())
{
auto find_columns = [&data, &select_columns](IAST * function)
{
auto f_impl = [&data, &select_columns](IAST * fn, auto fi)
{
if (auto * ident = fn->as<ASTIdentifier>())
{
if (select_columns.count(ident->getColumnName()) == 0)
data.addColumnIdentifier(*ident);
return;
}
if (fn->as<ASTFunction>() || fn->as<ASTExpressionList>())
for (const auto & ch : fn->children)
fi(ch.get(), fi);
return;
};
f_impl(function, f_impl);
};
for (const auto & interpolate : interpolate_list->children)
find_columns(interpolate->as<ASTInterpolateElement>()->expr.get());
}
if (const auto & with = select.with())
{
for (auto & node : with->children)

View File

@ -212,7 +212,7 @@ void TransactionLog::runUpdatingThread()
if (stop_flag.load())
return;
if (!zookeeper)
if (getZooKeeper()->expired())
{
auto new_zookeeper = global_context->getZooKeeper();
std::lock_guard lock{mutex};
@ -222,16 +222,11 @@ void TransactionLog::runUpdatingThread()
loadNewEntries();
removeOldEntries();
}
catch (const Coordination::Exception & e)
catch (const Coordination::Exception &)
{
tryLogCurrentException(log);
/// TODO better backoff
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
if (Coordination::isHardwareError(e.code))
{
std::lock_guard lock{mutex};
zookeeper.reset();
}
log_updated_event->set();
}
catch (...)

View File

@ -32,6 +32,7 @@
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/queryToString.h>
#include <DataTypes/NestedUtils.h>
@ -420,7 +421,8 @@ void renameDuplicatedColumns(const ASTSelectQuery * select_query)
/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are impossible.
void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
/// Also remove all INTERPOLATE columns which are not in SELECT anymore.
void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
{
ASTs & elements = select_query->select()->children;
@ -449,6 +451,8 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
ASTs new_elements;
new_elements.reserve(elements.size());
NameSet remove_columns;
for (const auto & elem : elements)
{
String name = elem->getAliasOrColumnName();
@ -465,6 +469,8 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
}
else
{
remove_columns.insert(name);
ASTFunction * func = elem->as<ASTFunction>();
/// Never remove untuple. It's result column may be in required columns.
@ -478,6 +484,24 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
}
}
if (select_query->interpolate())
{
auto & children = select_query->interpolate()->children;
if (!children.empty())
{
for (auto it = children.begin(); it != children.end();)
{
if (remove_columns.count((*it)->as<ASTInterpolateElement>()->column))
it = select_query->interpolate()->children.erase(it);
else
++it;
}
if (children.empty())
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, nullptr);
}
}
elements = std::move(new_elements);
}

View File

@ -116,6 +116,8 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes()
{"type", std::move(type_enum)},
{"event_date", std::make_shared<DataTypeDate>()},
{"event_time", std::make_shared<DataTypeDateTime64>(6)},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"query_id", std::make_shared<DataTypeString>()},
{"address", DataTypeFactory::instance().get("IPv6")},
{"port", std::make_shared<DataTypeUInt16>()},
{"session_id", std::make_shared<DataTypeInt64>()},
@ -164,6 +166,8 @@ void ZooKeeperLogElement::appendToBlock(MutableColumns & columns) const
auto event_time_seconds = event_time / 1000000;
columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType());
columns[i++]->insert(event_time);
columns[i++]->insert(thread_id);
columns[i++]->insert(query_id);
columns[i++]->insertData(IPv6ToBinary(address.host()).data(), 16);
columns[i++]->insert(address.port());
columns[i++]->insert(session_id);

View File

@ -22,6 +22,8 @@ struct ZooKeeperLogElement
Type type = UNKNOWN;
Decimal64 event_time = 0;
UInt64 thread_id = 0;
String query_id;
Poco::Net::SocketAddress address;
Int64 session_id = 0;

View File

@ -0,0 +1,16 @@
#include <Columns/Collator.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Common/SipHash.h>
#include <IO/Operators.h>
namespace DB
{
void ASTInterpolateElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
settings.ostr << column << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "");
expr->formatImpl(settings, state, frame);
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <Parsers/IAST.h>
namespace DB
{
class ASTInterpolateElement : public IAST
{
public:
String column;
ASTPtr expr;
String getID(char delim) const override { return String("InterpolateElement") + delim + "(column " + column + ")"; }
ASTPtr clone() const override
{
auto clone = std::make_shared<ASTInterpolateElement>(*this);
clone->expr = clone->expr->clone();
clone->children.clear();
clone->children.push_back(clone->expr);
return clone;
}
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};
}

View File

@ -37,4 +37,5 @@ public:
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};
}

View File

@ -129,6 +129,17 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
s.one_line
? orderBy()->formatImpl(s, state, frame)
: orderBy()->as<ASTExpressionList &>().formatImplMultiline(s, state, frame);
if (interpolate())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "INTERPOLATE" << (s.hilite ? hilite_none : "");
if (!interpolate()->children.empty())
{
s.ostr << " (";
interpolate()->formatImpl(s, state, frame);
s.ostr << " )";
}
}
}
if (limitByLength())

View File

@ -32,7 +32,8 @@ public:
LIMIT_BY,
LIMIT_OFFSET,
LIMIT_LENGTH,
SETTINGS
SETTINGS,
INTERPOLATE
};
static String expressionToString(Expression expr)
@ -69,6 +70,8 @@ public:
return "LIMIT LENGTH";
case Expression::SETTINGS:
return "SETTINGS";
case Expression::INTERPOLATE:
return "INTERPOLATE";
}
return "";
}
@ -91,21 +94,22 @@ public:
ASTPtr & refWhere() { return getExpression(Expression::WHERE); }
ASTPtr & refHaving() { return getExpression(Expression::HAVING); }
ASTPtr with() const { return getExpression(Expression::WITH); }
ASTPtr select() const { return getExpression(Expression::SELECT); }
ASTPtr tables() const { return getExpression(Expression::TABLES); }
ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); }
ASTPtr where() const { return getExpression(Expression::WHERE); }
ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); }
ASTPtr having() const { return getExpression(Expression::HAVING); }
ASTPtr window() const { return getExpression(Expression::WINDOW); }
ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); }
ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); }
ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); }
ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); }
ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); }
ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); }
ASTPtr settings() const { return getExpression(Expression::SETTINGS); }
const ASTPtr with() const { return getExpression(Expression::WITH); }
const ASTPtr select() const { return getExpression(Expression::SELECT); }
const ASTPtr tables() const { return getExpression(Expression::TABLES); }
const ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); }
const ASTPtr where() const { return getExpression(Expression::WHERE); }
const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); }
const ASTPtr having() const { return getExpression(Expression::HAVING); }
const ASTPtr window() const { return getExpression(Expression::WINDOW); }
const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); }
const ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); }
const ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); }
const ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); }
const ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); }
const ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); }
const ASTPtr settings() const { return getExpression(Expression::SETTINGS); }
const ASTPtr interpolate() const { return getExpression(Expression::INTERPOLATE); }
bool hasFiltration() const { return where() || prewhere() || having(); }

View File

@ -16,6 +16,7 @@
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTQualifiedAsterisk.h>
#include <Parsers/ASTQueryParameter.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -2316,6 +2317,35 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
return true;
}
bool ParserInterpolateElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword as("AS");
ParserExpression element_p;
ParserIdentifier ident_p;
ASTPtr ident;
if (!ident_p.parse(pos, ident, expected))
return false;
ASTPtr expr;
if (as.ignore(pos, expected))
{
if (!element_p.parse(pos, expr, expected))
return false;
}
else
expr = ident;
auto elem = std::make_shared<ASTInterpolateElement>();
elem->column = ident->getColumnName();
elem->expr = expr;
elem->children.push_back(expr);
node = elem;
return true;
}
bool ParserFunctionWithKeyValueArguments::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserIdentifier id_parser;

View File

@ -420,6 +420,15 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
/** Element of INTERPOLATE expression
*/
class ParserInterpolateElement : public IParserBase
{
protected:
const char * getName() const override { return "element of INTERPOLATE expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
/** Parser for function with arguments like KEY VALUE (space separated)
* no commas allowed, just space-separated pairs.
*/

View File

@ -763,6 +763,13 @@ bool ParserOrderByExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected &
}
bool ParserInterpolateExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserList(std::make_unique<ParserInterpolateElement>(), std::make_unique<ParserToken>(TokenType::Comma), true)
.parse(pos, node, expected);
}
bool ParserTTLExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserList(std::make_unique<ParserTTLElement>(), std::make_unique<ParserToken>(TokenType::Comma), false)

View File

@ -517,6 +517,12 @@ protected:
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserInterpolateExpressionList : public IParserBase
{
protected:
const char * getName() const override { return "interpolate expression"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
/// Parser for key-value pair, where value can be list of pairs.
class ParserKeyValuePair : public IParserBase

View File

@ -10,6 +10,10 @@
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ParserWithElement.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
@ -59,12 +63,14 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserKeyword s_rows("ROWS");
ParserKeyword s_first("FIRST");
ParserKeyword s_next("NEXT");
ParserKeyword s_interpolate("INTERPOLATE");
ParserNotEmptyExpressionList exp_list(false);
ParserNotEmptyExpressionList exp_list_for_with_clause(false);
ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Allows aliases without AS keyword.
ParserExpressionWithOptionalAlias exp_elem(false);
ParserOrderByExpressionList order_list;
ParserInterpolateExpressionList interpolate_list;
ParserToken open_bracket(TokenType::OpeningRoundBracket);
ParserToken close_bracket(TokenType::ClosingRoundBracket);
@ -78,6 +84,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr having_expression;
ASTPtr window_list;
ASTPtr order_expression_list;
ASTPtr interpolate_expression_list;
ASTPtr limit_by_length;
ASTPtr limit_by_offset;
ASTPtr limit_by_expression_list;
@ -239,6 +246,23 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (!order_list.parse(pos, order_expression_list, expected))
return false;
/// if any WITH FILL parse possible INTERPOLATE list
if (std::any_of(order_expression_list->children.begin(), order_expression_list->children.end(),
[](auto & child) { return child->template as<ASTOrderByElement>()->with_fill; }))
{
if (s_interpolate.ignore(pos, expected))
{
if (open_bracket.ignore(pos, expected))
{
if (!interpolate_list.parse(pos, interpolate_expression_list, expected))
return false;
if (!close_bracket.ignore(pos, expected))
return false;
} else
interpolate_expression_list = std::make_shared<ASTExpressionList>();
}
}
}
/// This is needed for TOP expression, because it can also use WITH TIES.
@ -430,6 +454,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, std::move(limit_offset));
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length));
select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings));
select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, std::move(interpolate_expression_list));
return true;
}

View File

@ -28,9 +28,9 @@ static ITransformingStep::Traits getTraits()
};
}
FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_description_)
FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_description_, InterpolateDescriptionPtr interpolate_description_)
: ITransformingStep(input_stream_, FillingTransform::transformHeader(input_stream_.header, sort_description_), getTraits())
, sort_description(std::move(sort_description_))
, sort_description(std::move(sort_description_)), interpolate_description(interpolate_description_)
{
if (!input_stream_.has_single_port)
throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR);
@ -41,7 +41,7 @@ void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr
{
bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals;
return std::make_shared<FillingTransform>(header, sort_description, on_totals);
return std::make_shared<FillingTransform>(header, sort_description, std::move(interpolate_description), on_totals);
});
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Core/SortDescription.h>
#include <Core/InterpolateDescription.h>
namespace DB
{
@ -9,7 +10,7 @@ namespace DB
class FillingStep : public ITransformingStep
{
public:
FillingStep(const DataStream & input_stream_, SortDescription sort_description_);
FillingStep(const DataStream & input_stream_, SortDescription sort_description_, InterpolateDescriptionPtr interpolate_description_);
String getName() const override { return "Filling"; }
@ -22,6 +23,7 @@ public:
private:
SortDescription sort_description;
InterpolateDescriptionPtr interpolate_description;
};
}

View File

@ -3,6 +3,7 @@
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/TotalsHavingStep.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Processors/QueryPlan/WindowStep.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
@ -66,6 +67,11 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
if (typeid_cast<const TotalsHavingStep *>(child.get()))
return 0;
/// Disable for WindowStep.
/// TODO: we can push down limit in some cases if increase the limit value.
if (typeid_cast<const WindowStep *>(child.get()))
return 0;
/// Now we should decide if pushing down limit possible for this step.
const auto & transform_traits = transforming->getTransformTraits();

View File

@ -18,7 +18,7 @@ namespace ErrorCodes
extern const int INVALID_WITH_FILL_EXPRESSION;
}
Block FillingTransform::transformHeader(Block header, const SortDescription & sort_description)
Block FillingTransform::transformHeader(Block header, const SortDescription & sort_description/*, const InterpolateDescription & interpolate_description*/)
{
NameSet sort_keys;
for (const auto & key : sort_description)
@ -138,19 +138,28 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr &
}
FillingTransform::FillingTransform(
const Block & header_, const SortDescription & sort_description_, bool on_totals_)
: ISimpleTransform(header_, transformHeader(header_, sort_description_), true)
, sort_description(sort_description_)
, on_totals(on_totals_)
, filling_row(sort_description_)
, next_row(sort_description_)
const Block & header_, const SortDescription & sort_description_, InterpolateDescriptionPtr interpolate_description_, bool on_totals_)
: ISimpleTransform(header_, transformHeader(header_, sort_description_), true)
, sort_description(sort_description_)
, interpolate_description(interpolate_description_)
, on_totals(on_totals_)
, filling_row(sort_description_)
, next_row(sort_description_)
{
if (on_totals)
return;
if (interpolate_description)
interpolate_actions = std::make_shared<ExpressionActions>(interpolate_description->actions);
std::vector<bool> is_fill_column(header_.columns());
for (size_t i = 0, size = sort_description.size(); i < size; ++i)
{
if (interpolate_description && interpolate_description->result_columns_set.count(sort_description[i].column_name))
throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION,
"Column '{}' is participating in ORDER BY ... WITH FILL expression and can't be INTERPOLATE output",
sort_description[i].column_name);
size_t block_position = header_.getPositionByName(sort_description[i].column_name);
is_fill_column[block_position] = true;
fill_column_positions.push_back(block_position);
@ -176,9 +185,23 @@ FillingTransform::FillingTransform(
if (!unique_positions.insert(pos).second)
throw Exception("Multiple WITH FILL for identical expressions is not supported in ORDER BY", ErrorCodes::INVALID_WITH_FILL_EXPRESSION);
for (size_t i = 0; i < header_.columns(); ++i)
if (!is_fill_column[i])
other_column_positions.push_back(i);
size_t idx = 0;
for (const ColumnWithTypeAndName & column : header_.getColumnsWithTypeAndName())
{
if (interpolate_description)
if (const auto & p = interpolate_description->required_columns_map.find(column.name);
p != interpolate_description->required_columns_map.end())
input_positions.emplace_back(idx, p->second);
if (!is_fill_column[idx] && !(interpolate_description && interpolate_description->result_columns_set.count(column.name)))
other_column_positions.push_back(idx);
++idx;
}
if (interpolate_description)
for (const auto & name : interpolate_description->result_columns_order)
interpolate_column_positions.push_back(header_.getPositionByName(name));
}
IProcessor::Status FillingTransform::prepare()
@ -207,37 +230,90 @@ void FillingTransform::transform(Chunk & chunk)
return;
Columns old_fill_columns;
Columns old_interpolate_columns;
Columns old_other_columns;
MutableColumns res_fill_columns;
MutableColumns res_interpolate_columns;
MutableColumns res_other_columns;
auto init_columns_by_positions = [](const Columns & old_columns, Columns & new_columns,
MutableColumns & new_mutable_columns, const Positions & positions)
std::vector<std::pair<MutableColumns *, size_t>> res_map;
res_map.resize(input.getHeader().columns());
auto init_columns_by_positions = [&res_map](const Columns & old_columns, Columns & new_columns,
MutableColumns & new_mutable_columns, const Positions & positions)
{
for (size_t pos : positions)
{
auto old_column = old_columns[pos]->convertToFullColumnIfConst();
new_columns.push_back(old_column);
res_map[pos] = {&new_mutable_columns, new_mutable_columns.size()};
new_mutable_columns.push_back(old_column->cloneEmpty()->assumeMutable());
}
};
Block interpolate_block;
auto interpolate = [&]()
{
if (interpolate_description)
{
interpolate_block.clear();
if (!input_positions.empty())
{
/// populate calculation block with required columns with values from previous row
for (const auto & [col_pos, name_type] : input_positions)
{
MutableColumnPtr column = name_type.type->createColumn();
auto [res_columns, pos] = res_map[col_pos];
size_t size = (*res_columns)[pos]->size();
if (size == 0) /// this is the first row in current chunk
{
/// take value from last row of previous chunk if exists, else use default
if (last_row.size() > col_pos && !last_row[col_pos]->empty())
column->insertFrom(*last_row[col_pos], 0);
else
column->insertDefault();
}
else /// take value from previous row of current chunk
column->insertFrom(*(*res_columns)[pos], size - 1);
interpolate_block.insert({std::move(column), name_type.type, name_type.name});
}
interpolate_actions->execute(interpolate_block);
}
else /// all INTERPOLATE expressions are constants
{
size_t n = 1;
interpolate_actions->execute(interpolate_block, n);
}
}
};
if (generate_suffix)
{
const auto & empty_columns = input.getHeader().getColumns();
init_columns_by_positions(empty_columns, old_fill_columns, res_fill_columns, fill_column_positions);
init_columns_by_positions(empty_columns, old_interpolate_columns, res_interpolate_columns, interpolate_column_positions);
init_columns_by_positions(empty_columns, old_other_columns, res_other_columns, other_column_positions);
if (first)
filling_row.initFromDefaults();
if (should_insert_first && filling_row < next_row)
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);
{
interpolate();
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
}
interpolate();
while (filling_row.next(next_row))
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);
{
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
interpolate();
}
setResultColumns(chunk, res_fill_columns, res_other_columns);
setResultColumns(chunk, res_fill_columns, res_interpolate_columns, res_other_columns);
return;
}
@ -245,6 +321,7 @@ void FillingTransform::transform(Chunk & chunk)
auto old_columns = chunk.detachColumns();
init_columns_by_positions(old_columns, old_fill_columns, res_fill_columns, fill_column_positions);
init_columns_by_positions(old_columns, old_interpolate_columns, res_interpolate_columns, interpolate_column_positions);
init_columns_by_positions(old_columns, old_other_columns, res_other_columns, other_column_positions);
if (first)
@ -258,7 +335,10 @@ void FillingTransform::transform(Chunk & chunk)
{
filling_row.initFromDefaults(i);
if (less(fill_from, current_value, filling_row.getDirection(i)))
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);
{
interpolate();
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
}
break;
}
filling_row[i] = current_value;
@ -284,31 +364,72 @@ void FillingTransform::transform(Chunk & chunk)
/// A case, when at previous step row was initialized from defaults 'fill_from' values
/// and probably we need to insert it to block.
if (should_insert_first && filling_row < next_row)
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);
{
interpolate();
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
}
/// Insert generated filling row to block, while it is less than current row in block.
interpolate();
while (filling_row.next(next_row))
insertFromFillingRow(res_fill_columns, res_other_columns, filling_row);
{
insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
interpolate();
}
copyRowFromColumns(res_fill_columns, old_fill_columns, row_ind);
copyRowFromColumns(res_interpolate_columns, old_interpolate_columns, row_ind);
copyRowFromColumns(res_other_columns, old_other_columns, row_ind);
}
setResultColumns(chunk, res_fill_columns, res_other_columns);
saveLastRow(res_fill_columns, res_interpolate_columns, res_other_columns);
setResultColumns(chunk, res_fill_columns, res_interpolate_columns, res_other_columns);
}
void FillingTransform::setResultColumns(Chunk & chunk, MutableColumns & fill_columns, MutableColumns & other_columns) const
void FillingTransform::setResultColumns(Chunk & chunk, MutableColumns & fill_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns) const
{
MutableColumns result_columns(fill_columns.size() + other_columns.size());
MutableColumns result_columns(fill_columns.size() + interpolate_columns.size() + other_columns.size());
/// fill_columns always non-empty.
size_t num_rows = fill_columns[0]->size();
for (size_t i = 0, size = fill_columns.size(); i < size; ++i)
result_columns[fill_column_positions[i]] = std::move(fill_columns[i]);
for (size_t i = 0, size = interpolate_columns.size(); i < size; ++i)
result_columns[interpolate_column_positions[i]] = std::move(interpolate_columns[i]);
for (size_t i = 0, size = other_columns.size(); i < size; ++i)
result_columns[other_column_positions[i]] = std::move(other_columns[i]);
chunk.setColumns(std::move(result_columns), num_rows);
}
void FillingTransform::saveLastRow(const MutableColumns & fill_columns, const MutableColumns & interpolate_columns, const MutableColumns & other_columns)
{
last_row.clear();
last_row.resize(fill_columns.size() + interpolate_columns.size() + other_columns.size());
size_t num_rows = fill_columns[0]->size();
if (num_rows == 0)
return;
for (size_t i = 0, size = fill_columns.size(); i < size; ++i)
{
auto column = fill_columns[i]->cloneEmpty();
column->insertFrom(*fill_columns[i], num_rows - 1);
last_row[fill_column_positions[i]] = std::move(column);
}
for (size_t i = 0, size = interpolate_columns.size(); i < size; ++i)
{
auto column = interpolate_columns[i]->cloneEmpty();
column->insertFrom(*interpolate_columns[i], num_rows - 1);
last_row[interpolate_column_positions[i]] = std::move(column);
}
for (size_t i = 0, size = other_columns.size(); i < size; ++i)
{
auto column = other_columns[i]->cloneEmpty();
column->insertFrom(*other_columns[i], num_rows - 1);
last_row[other_column_positions[i]] = std::move(column);
}
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Processors/ISimpleTransform.h>
#include <Core/SortDescription.h>
#include <Core/InterpolateDescription.h>
#include <Interpreters/FillingRow.h>
namespace DB
@ -13,7 +14,7 @@ namespace DB
class FillingTransform : public ISimpleTransform
{
public:
FillingTransform(const Block & header_, const SortDescription & sort_description_, bool on_totals_);
FillingTransform(const Block & header_, const SortDescription & sort_description_, InterpolateDescriptionPtr interpolate_description_, bool on_totals_);
String getName() const override { return "FillingTransform"; }
@ -25,9 +26,11 @@ protected:
void transform(Chunk & Chunk) override;
private:
void setResultColumns(Chunk & chunk, MutableColumns & fill_columns, MutableColumns & other_columns) const;
void setResultColumns(Chunk & chunk, MutableColumns & fill_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns) const;
void saveLastRow(const MutableColumns & fill_columns, const MutableColumns & interpolate_columns, const MutableColumns & other_columns);
const SortDescription sort_description; /// Contains only rows with WITH FILL.
const SortDescription sort_description; /// Contains only columns with WITH FILL.
const InterpolateDescriptionPtr interpolate_description; /// Contains INTERPOLATE columns
const bool on_totals; /// FillingTransform does nothing on totals.
FillingRow filling_row; /// Current row, which is used to fill gaps.
@ -35,10 +38,15 @@ private:
using Positions = std::vector<size_t>;
Positions fill_column_positions;
Positions interpolate_column_positions;
Positions other_column_positions;
std::vector<std::pair<size_t, NameAndTypePair>> input_positions; /// positions in result columns required for actions
ExpressionActionsPtr interpolate_actions;
bool first = true;
bool generate_suffix = false;
Columns last_row;
/// Determines should we insert filling row before start generating next rows.
bool should_insert_first = false;
};

View File

@ -728,6 +728,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
return;
sendData({});
last_sent_snapshots.clear();
}
sendProgress();

View File

@ -30,7 +30,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
HDFSBuilderWrapper builder;
HDFSFSPtr fs;
off_t offset = 0;
off_t file_offset = 0;
off_t read_until_position = 0;
explicit ReadBufferFromHDFSImpl(
@ -71,13 +71,13 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
size_t num_bytes_to_read;
if (read_until_position)
{
if (read_until_position == offset)
if (read_until_position == file_offset)
return false;
if (read_until_position < offset)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
if (read_until_position < file_offset)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", file_offset, read_until_position - 1);
num_bytes_to_read = read_until_position - offset;
num_bytes_to_read = read_until_position - file_offset;
}
else
{
@ -94,28 +94,28 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
{
working_buffer = internal_buffer;
working_buffer.resize(bytes_read);
offset += bytes_read;
file_offset += bytes_read;
return true;
}
return false;
}
off_t seek(off_t offset_, int whence) override
off_t seek(off_t file_offset_, int whence) override
{
if (whence != SEEK_SET)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only SEEK_SET is supported");
offset = offset_;
int seek_status = hdfsSeek(fs.get(), fin, offset);
file_offset = file_offset_;
int seek_status = hdfsSeek(fs.get(), fin, file_offset);
if (seek_status != 0)
throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError()));
return offset;
return file_offset;
}
off_t getPosition() override
{
return offset;
return file_offset;
}
};
@ -140,7 +140,7 @@ bool ReadBufferFromHDFS::nextImpl()
auto result = impl->next();
if (result)
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset); /// use the buffer returned by `impl`
BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl`
return result;
}

View File

@ -93,7 +93,7 @@ Pipe StorageHDFSCluster::read(
/// So, task_identifier is passed as constructor argument. It is more obvious.
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
connection,
queryToString(query_info.query),
queryToString(query_info.original_query),
header,
context,
/*throttler=*/nullptr,

View File

@ -29,31 +29,6 @@ ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastore
{
}
bool HiveMetastoreClient::shouldUpdateTableMetadata(
const String & db_name, const String & table_name, const std::vector<Apache::Hadoop::Hive::Partition> & partitions)
{
String cache_key = getCacheKey(db_name, table_name);
HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key);
if (!metadata)
return true;
auto old_partiton_infos = metadata->getPartitionInfos();
if (old_partiton_infos.size() != partitions.size())
return true;
for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end())
return true;
const auto & old_partition_info = it->second;
if (!old_partition_info.haveSameParameters(partition))
return true;
}
return false;
}
void HiveMetastoreClient::tryCallHiveClient(std::function<void(ThriftHiveMetastoreClientPool::Entry &)> func)
{
int i = 0;
@ -91,44 +66,17 @@ HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata(
};
tryCallHiveClient(client_call);
bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions);
// bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions);
String cache_key = getCacheKey(db_name, table_name);
HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key);
if (update_cache)
if (metadata)
{
LOG_INFO(log, "Reload hive partition metadata info for {}.{}", db_name, table_name);
/// Generate partition infos from partitions and old partition infos(if exists).
std::map<String, PartitionInfo> new_partition_infos;
if (metadata)
{
auto & old_partiton_infos = metadata->getPartitionInfos();
for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end() || !it->second.haveSameParameters(partition) || !it->second.initialized)
{
new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition));
continue;
}
else
{
PartitionInfo new_partition_info(partition);
new_partition_info.files = std::move(it->second.files);
new_partition_info.initialized = true;
}
}
}
else
{
for (const auto & partition : partitions)
new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition));
}
metadata = std::make_shared<HiveMetastoreClient::HiveTableMetadata>(
db_name, table_name, table, std::move(new_partition_infos), getContext());
metadata->updateIfNeeded(partitions);
}
else
{
metadata = std::make_shared<HiveTableMetadata>(db_name, table_name, table, partitions);
table_metadata_cache.set(cache_key, metadata);
}
return metadata;
@ -157,14 +105,14 @@ void HiveMetastoreClient::clearTableMetadata(const String & db_name, const Strin
bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const
{
/// Parameters include keys:numRows,numFiles,rawDataSize,totalSize,transient_lastDdlTime
auto it1 = partition.parameters.cbegin();
auto it2 = other.parameters.cbegin();
for (; it1 != partition.parameters.cend() && it2 != other.parameters.cend(); ++it1, ++it2)
auto it = partition.parameters.cbegin();
auto oit = other.parameters.cbegin();
for (; it != partition.parameters.cend() && oit != other.parameters.cend(); ++it, ++oit)
{
if (it1->first != it2->first || it1->second != it2->second)
if (it->first != oit->first || it->second != oit->second)
return false;
}
return (it1 == partition.parameters.cend() && it2 == other.parameters.cend());
return (it == partition.parameters.cend() && oit == other.parameters.cend());
}
std::vector<Apache::Hadoop::Hive::Partition> HiveMetastoreClient::HiveTableMetadata::getPartitions() const
@ -172,6 +120,7 @@ std::vector<Apache::Hadoop::Hive::Partition> HiveMetastoreClient::HiveTableMetad
std::vector<Apache::Hadoop::Hive::Partition> result;
std::lock_guard lock{mutex};
result.reserve(partition_infos.size());
for (const auto & partition_info : partition_infos)
result.emplace_back(partition_info.second.partition);
return result;
@ -220,6 +169,57 @@ std::vector<HiveMetastoreClient::FileInfo> HiveMetastoreClient::HiveTableMetadat
return result;
}
HiveFilesCachePtr HiveMetastoreClient::HiveTableMetadata::getHiveFilesCache() const
{
return hive_files_cache;
}
void HiveMetastoreClient::HiveTableMetadata::updateIfNeeded(const std::vector<Apache::Hadoop::Hive::Partition> & partitions)
{
std::lock_guard lock{mutex};
if (!shouldUpdate(partitions))
return;
std::map<String, PartitionInfo> new_partition_infos;
auto & old_partiton_infos = partition_infos;
for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end() || !it->second.haveSameParameters(partition) || !it->second.initialized)
{
new_partition_infos.emplace(partition.sd.location, PartitionInfo(partition));
continue;
}
else
{
new_partition_infos.emplace(partition.sd.location, std::move(it->second));
}
}
partition_infos.swap(new_partition_infos);
}
bool HiveMetastoreClient::HiveTableMetadata::shouldUpdate(const std::vector<Apache::Hadoop::Hive::Partition> & partitions)
{
const auto & old_partiton_infos = partition_infos;
if (old_partiton_infos.size() != partitions.size())
return true;
for (const auto & partition : partitions)
{
auto it = old_partiton_infos.find(partition.sd.location);
if (it == old_partiton_infos.end())
return true;
const auto & old_partition_info = it->second;
if (!old_partition_info.haveSameParameters(partition))
return true;
}
return false;
}
HiveMetastoreClientFactory & HiveMetastoreClientFactory::instance()
{
static HiveMetastoreClientFactory factory;
@ -231,9 +231,8 @@ using namespace apache::thrift::protocol;
using namespace apache::thrift::transport;
using namespace Apache::Hadoop::Hive;
HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name, ContextPtr context)
HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name)
{
std::lock_guard lock(mutex);
auto it = clients.find(name);
if (it == clients.end())
@ -242,12 +241,13 @@ HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & na
{
return createThriftHiveMetastoreClient(name);
};
auto client = std::make_shared<HiveMetastoreClient>(builder, context->getGlobalContext());
clients[name] = client;
auto client = std::make_shared<HiveMetastoreClient>(builder);
clients.emplace(name, client);
return client;
}
return it->second;
}
std::shared_ptr<ThriftHiveMetastoreClient> HiveMetastoreClientFactory::createThriftHiveMetastoreClient(const String &name)
{
Poco::URI hive_metastore_url(name);

View File

@ -13,6 +13,7 @@
#include <Common/LRUCache.h>
#include <Common/PoolBase.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/Hive/HiveFile.h>
namespace DB
@ -37,10 +38,9 @@ protected:
private:
ThriftHiveMetastoreClientBuilder builder;
};
class HiveMetastoreClient : public WithContext
class HiveMetastoreClient
{
public:
struct FileInfo
{
String path;
@ -63,68 +63,68 @@ public:
bool initialized = false; /// If true, files are initialized.
explicit PartitionInfo(const Apache::Hadoop::Hive::Partition & partition_): partition(partition_) {}
PartitionInfo(PartitionInfo &&) = default;
bool haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const;
};
class HiveTableMetadata;
using HiveTableMetadataPtr = std::shared_ptr<HiveTableMetadata>;
/// Used for speeding up metadata query process.
struct HiveTableMetadata : public WithContext
class HiveTableMetadata : boost::noncopyable
{
public:
HiveTableMetadata(
const String & db_name_,
const String & table_name_,
std::shared_ptr<Apache::Hadoop::Hive::Table> table_,
const std::map<String, PartitionInfo> & partition_infos_,
ContextPtr context_)
: WithContext(context_)
, db_name(db_name_)
const std::vector<Apache::Hadoop::Hive::Partition> & partitions_)
: db_name(db_name_)
, table_name(table_name_)
, table(table_)
, partition_infos(partition_infos_)
, table(std::move(table_))
, empty_partition_keys(table->partitionKeys.empty())
, hive_files_cache(std::make_shared<HiveFilesCache>(10000))
{
std::lock_guard lock(mutex);
for (const auto & partition : partitions_)
partition_infos.emplace(partition.sd.location, PartitionInfo(partition));
}
std::map<String, PartitionInfo> & getPartitionInfos()
{
std::lock_guard lock{mutex};
return partition_infos;
}
std::shared_ptr<Apache::Hadoop::Hive::Table> getTable() const
{
std::lock_guard lock{mutex};
return table;
}
std::shared_ptr<Apache::Hadoop::Hive::Table> getTable() const { return table; }
std::vector<Apache::Hadoop::Hive::Partition> getPartitions() const;
std::vector<FileInfo> getFilesByLocation(const HDFSFSPtr & fs, const String & location);
private:
String db_name;
String table_name;
HiveFilesCachePtr getHiveFilesCache() const;
void updateIfNeeded(const std::vector<Apache::Hadoop::Hive::Partition> & partitions);
private:
bool shouldUpdate(const std::vector<Apache::Hadoop::Hive::Partition> & partitions);
const String db_name;
const String table_name;
const std::shared_ptr<Apache::Hadoop::Hive::Table> table;
/// Mutex to protect partition_infos.
mutable std::mutex mutex;
std::shared_ptr<Apache::Hadoop::Hive::Table> table;
std::map<String, PartitionInfo> partition_infos;
const bool empty_partition_keys;
const HiveFilesCachePtr hive_files_cache;
Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient");
};
using HiveTableMetadataPtr = std::shared_ptr<HiveMetastoreClient::HiveTableMetadata>;
explicit HiveMetastoreClient(ThriftHiveMetastoreClientBuilder builder_, ContextPtr context_)
: WithContext(context_)
, table_metadata_cache(1000)
explicit HiveMetastoreClient(ThriftHiveMetastoreClientBuilder builder_)
: table_metadata_cache(1000)
, client_pool(builder_)
{
}
HiveTableMetadataPtr getTableMetadata(const String & db_name, const String & table_name);
// Access hive table information by hive client
std::shared_ptr<Apache::Hadoop::Hive::Table> getHiveTable(const String & db_name, const String & table_name);
@ -133,9 +133,6 @@ public:
private:
static String getCacheKey(const String & db_name, const String & table_name) { return db_name + "." + table_name; }
bool shouldUpdateTableMetadata(
const String & db_name, const String & table_name, const std::vector<Apache::Hadoop::Hive::Partition> & partitions);
void tryCallHiveClient(std::function<void(ThriftHiveMetastoreClientPool::Entry &)> func);
LRUCache<String, HiveTableMetadata> table_metadata_cache;
@ -150,11 +147,11 @@ class HiveMetastoreClientFactory final : private boost::noncopyable
public:
static HiveMetastoreClientFactory & instance();
HiveMetastoreClientPtr getOrCreate(const String & name, ContextPtr context);
static std::shared_ptr<Apache::Hadoop::Hive::ThriftHiveMetastoreClient> createThriftHiveMetastoreClient(const String & name);
HiveMetastoreClientPtr getOrCreate(const String & name);
private:
static std::shared_ptr<Apache::Hadoop::Hive::ThriftHiveMetastoreClient> createThriftHiveMetastoreClient(const String & name);
std::mutex mutex;
std::map<String, HiveMetastoreClientPtr> clients;
};

View File

@ -77,6 +77,29 @@ Range createRangeFromParquetStatistics(std::shared_ptr<parquet::ByteArrayStatist
return Range(min_val, true, max_val, true);
}
std::optional<size_t> IHiveFile::getRows()
{
if (!rows)
rows = getRowsImpl();
return rows;
}
void IHiveFile::loadFileMinMaxIndex()
{
if (file_minmax_idx_loaded)
return;
loadFileMinMaxIndexImpl();
file_minmax_idx_loaded = true;
}
void IHiveFile::loadSplitMinMaxIndexes()
{
if (split_minmax_idxes_loaded)
return;
loadSplitMinMaxIndexesImpl();
split_minmax_idxes_loaded = true;
}
Range HiveORCFile::buildRange(const orc::ColumnStatistics * col_stats)
{
if (!col_stats || col_stats->hasNull())
@ -183,8 +206,7 @@ std::unique_ptr<IMergeTreeDataPart::MinMaxIndex> HiveORCFile::buildMinMaxIndex(c
return idx;
}
void HiveORCFile::loadFileMinMaxIndex()
void HiveORCFile::loadFileMinMaxIndexImpl()
{
if (!reader)
{
@ -202,7 +224,7 @@ bool HiveORCFile::useSplitMinMaxIndex() const
}
void HiveORCFile::loadSplitMinMaxIndex()
void HiveORCFile::loadSplitMinMaxIndexesImpl()
{
if (!reader)
{
@ -226,6 +248,18 @@ void HiveORCFile::loadSplitMinMaxIndex()
}
}
std::optional<size_t> HiveORCFile::getRowsImpl()
{
if (!reader)
{
prepareReader();
prepareColumnMapping();
}
auto * raw_reader = reader->GetRawORCReader();
return raw_reader->getNumberOfRows();
}
bool HiveParquetFile::useSplitMinMaxIndex() const
{
return storage_settings->enable_parquet_rowgroup_minmax_index;
@ -239,7 +273,7 @@ void HiveParquetFile::prepareReader()
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings, is_stopped), arrow::default_memory_pool(), &reader));
}
void HiveParquetFile::loadSplitMinMaxIndex()
void HiveParquetFile::loadSplitMinMaxIndexesImpl()
{
if (!reader)
prepareReader();
@ -312,5 +346,14 @@ void HiveParquetFile::loadSplitMinMaxIndex()
}
}
std::optional<size_t> HiveParquetFile::getRowsImpl()
{
if (!reader)
prepareReader();
auto meta = reader->parquet_reader()->metadata();
return meta->num_rows();
}
}
#endif

View File

@ -83,7 +83,7 @@ public:
size_t size_,
const NamesAndTypesList & index_names_and_types_,
const std::shared_ptr<HiveSettings> & storage_settings_,
ContextPtr context_)
const ContextPtr & context_)
: WithContext(context_)
, partition_values(partition_values_)
, namenode_url(namenode_url_)
@ -100,6 +100,7 @@ public:
const String & getPath() const { return path; }
UInt64 getLastModTs() const { return last_modify_time; }
size_t getSize() const { return size; }
std::optional<size_t> getRows();
const FieldVector & getPartitionValues() const { return partition_values; }
const String & getNamenodeUrl() { return namenode_url; }
MinMaxIndexPtr getMinMaxIndex() const { return file_minmax_idx; }
@ -112,7 +113,6 @@ public:
{
if (!idx)
return "";
std::vector<String> strs;
strs.reserve(index_names_and_types.size());
size_t i = 0;
@ -123,30 +123,42 @@ public:
virtual FileFormat getFormat() const = 0;
/// If hive query could use file level minmax index?
virtual bool useFileMinMaxIndex() const { return false; }
void loadFileMinMaxIndex();
virtual void loadFileMinMaxIndex()
{
throw Exception("Method loadFileMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
}
/// If hive query could use contains sub-file level minmax index?
/// If hive query could use sub-file level minmax index?
virtual bool useSplitMinMaxIndex() const { return false; }
virtual void loadSplitMinMaxIndex()
{
throw Exception("Method loadSplitMinMaxIndex is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
}
void loadSplitMinMaxIndexes();
protected:
virtual void loadFileMinMaxIndexImpl()
{
throw Exception("Method loadFileMinMaxIndexImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual void loadSplitMinMaxIndexesImpl()
{
throw Exception("Method loadSplitMinMaxIndexesImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED);
}
virtual std::optional<size_t> getRowsImpl() = 0;
FieldVector partition_values;
String namenode_url;
String path;
UInt64 last_modify_time;
size_t size;
std::optional<size_t> rows;
NamesAndTypesList index_names_and_types;
MinMaxIndexPtr file_minmax_idx;
std::atomic<bool> file_minmax_idx_loaded{false};
std::vector<MinMaxIndexPtr> split_minmax_idxes;
std::atomic<bool> split_minmax_idxes_loaded{false};
/// Skip splits for this file after applying minmax index (if any)
std::unordered_set<int> skip_splits;
std::shared_ptr<HiveSettings> storage_settings;
@ -154,6 +166,8 @@ protected:
using HiveFilePtr = std::shared_ptr<IHiveFile>;
using HiveFiles = std::vector<HiveFilePtr>;
using HiveFilesCache = LRUCache<String, IHiveFile>;
using HiveFilesCachePtr = std::shared_ptr<HiveFilesCache>;
class HiveTextFile : public IHiveFile
{
@ -166,12 +180,15 @@ public:
size_t size_,
const NamesAndTypesList & index_names_and_types_,
const std::shared_ptr<HiveSettings> & hive_settings_,
ContextPtr context_)
const ContextPtr & context_)
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
{
}
virtual FileFormat getFormat() const override { return FileFormat::TEXT; }
FileFormat getFormat() const override { return FileFormat::TEXT; }
private:
std::optional<size_t> getRowsImpl() override { return {}; }
};
class HiveORCFile : public IHiveFile
@ -185,25 +202,26 @@ public:
size_t size_,
const NamesAndTypesList & index_names_and_types_,
const std::shared_ptr<HiveSettings> & hive_settings_,
ContextPtr context_)
const ContextPtr & context_)
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
{
}
FileFormat getFormat() const override { return FileFormat::ORC; }
bool useFileMinMaxIndex() const override;
void loadFileMinMaxIndex() override;
bool useSplitMinMaxIndex() const override;
void loadSplitMinMaxIndex() override;
private:
static Range buildRange(const orc::ColumnStatistics * col_stats);
void loadFileMinMaxIndexImpl() override;
void loadSplitMinMaxIndexesImpl() override;
std::unique_ptr<MinMaxIndex> buildMinMaxIndex(const orc::Statistics * statistics);
void prepareReader();
void prepareColumnMapping();
std::optional<size_t> getRowsImpl() override;
std::unique_ptr<ReadBufferFromHDFS> in;
std::unique_ptr<arrow::adapters::orc::ORCFileReader> reader;
std::map<String, size_t> orc_column_positions;
@ -220,17 +238,17 @@ public:
size_t size_,
const NamesAndTypesList & index_names_and_types_,
const std::shared_ptr<HiveSettings> & hive_settings_,
ContextPtr context_)
const ContextPtr & context_)
: IHiveFile(partition_values_, namenode_url_, path_, last_modify_time_, size_, index_names_and_types_, hive_settings_, context_)
{
}
FileFormat getFormat() const override { return FileFormat::PARQUET; }
bool useSplitMinMaxIndex() const override;
void loadSplitMinMaxIndex() override;
private:
void loadSplitMinMaxIndexesImpl() override;
std::optional<size_t> getRowsImpl() override;
void prepareReader();
std::unique_ptr<ReadBufferFromHDFS> in;

View File

@ -44,6 +44,7 @@ namespace ErrorCodes
extern const int INVALID_PARTITION_VALUE;
extern const int BAD_ARGUMENTS;
extern const int CANNOT_OPEN_FILE;
extern const int LOGICAL_ERROR;
}
@ -60,7 +61,7 @@ public:
struct SourcesInfo
{
HiveMetastoreClientPtr hive_metastore_client;
std::string database;
std::string database_name;
std::string table_name;
HiveFiles hive_files;
NamesAndTypesList partition_name_types;
@ -169,7 +170,7 @@ public:
{
if (e.code() == ErrorCodes::CANNOT_OPEN_FILE)
{
source_info->hive_metastore_client->clearTableMetadata(source_info->database, source_info->table_name);
source_info->hive_metastore_client->clearTableMetadata(source_info->database_name, source_info->table_name);
throw;
}
}
@ -307,6 +308,8 @@ StorageHive::StorageHive(
storage_metadata.setColumns(columns_);
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment_);
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
setInMemoryMetadata(storage_metadata);
}
@ -316,7 +319,7 @@ void StorageHive::lazyInitialize()
if (has_initialized)
return;
auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext());
auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url);
auto hive_table_metadata = hive_metastore_client->getHiveTable(hive_database, hive_table);
hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->sd.location);
@ -412,7 +415,7 @@ ASTPtr StorageHive::extractKeyExpressionList(const ASTPtr & node)
}
HiveFilePtr createHiveFile(
static HiveFilePtr createHiveFile(
const String & format_name,
const FieldVector & fields,
const String & namenode_url,
@ -421,7 +424,7 @@ HiveFilePtr createHiveFile(
size_t size,
const NamesAndTypesList & index_names_and_types,
const std::shared_ptr<HiveSettings> & hive_settings,
ContextPtr context)
const ContextPtr & context)
{
HiveFilePtr hive_file;
if (format_name == "HiveText")
@ -443,24 +446,26 @@ HiveFilePtr createHiveFile(
return hive_file;
}
std::vector<HiveFilePtr> StorageHive::collectHiveFilesFromPartition(
HiveFiles StorageHive::collectHiveFilesFromPartition(
const Apache::Hadoop::Hive::Partition & partition,
SelectQueryInfo & query_info,
HiveTableMetadataPtr hive_table_metadata,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const HDFSFSPtr & fs,
ContextPtr context_)
const ContextPtr & context_,
PruneLevel prune_level) const
{
LOG_DEBUG(log, "Collect hive files from partition {}", boost::join(partition.values, ","));
LOG_DEBUG(
log, "Collect hive files from partition {}, prune_level:{}", boost::join(partition.values, ","), pruneLevelToString(prune_level));
/// Skip partition "__HIVE_DEFAULT_PARTITION__"
bool has_default_partition = false;
for (const auto & value : partition.values)
{
if (value == "__HIVE_DEFAULT_PARTITION__")
{
has_default_partition = true;
break;
}
/// Skip partition "__HIVE_DEFAULT_PARTITION__"
bool has_default_partition = false;
for (const auto & value : partition.values)
{
if (value == "__HIVE_DEFAULT_PARTITION__")
{
has_default_partition = true;
break;
}
}
if (has_default_partition)
return {};
@ -490,95 +495,133 @@ std::vector<HiveFilePtr> StorageHive::collectHiveFilesFromPartition(
if (!reader->pull(block) || !block.rows())
throw Exception("Could not parse partition value: " + wb.str(), ErrorCodes::INVALID_PARTITION_VALUE);
std::vector<Range> ranges;
ranges.reserve(partition_names.size());
/// Get partition values
FieldVector fields(partition_names.size());
for (size_t i = 0; i < partition_names.size(); ++i)
{
block.getByPosition(i).column->get(0, fields[i]);
ranges.emplace_back(fields[i]);
if (prune_level >= PruneLevel::Partition)
{
std::vector<Range> ranges;
ranges.reserve(partition_names.size());
for (size_t i = 0; i < partition_names.size(); ++i)
ranges.emplace_back(fields[i]);
const KeyCondition partition_key_condition(query_info, getContext(), partition_names, partition_minmax_idx_expr);
if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true)
return {};
}
const KeyCondition partition_key_condition(query_info, getContext(), partition_names, partition_minmax_idx_expr);
if (!partition_key_condition.checkInHyperrectangle(ranges, partition_types).can_be_true)
return {};
HiveFiles hive_files;
auto file_infos = listDirectory(partition.sd.location, hive_table_metadata, fs);
std::vector<HiveFilePtr> hive_files;
hive_files.reserve(file_infos.size());
for (const auto & file_info : file_infos)
{
auto hive_file = createHiveFileIfNeeded(file_info, fields, query_info, context_);
auto hive_file = getHiveFileIfNeeded(file_info, fields, query_info, hive_table_metadata, context_, prune_level);
if (hive_file)
{
LOG_TRACE(
log,
"Append hive file {} from partition {}, prune_level:{}",
hive_file->getPath(),
boost::join(partition.values, ","),
pruneLevelToString(prune_level));
hive_files.push_back(hive_file);
}
}
return hive_files;
}
std::vector<StorageHive::FileInfo>
StorageHive::listDirectory(const String & path, HiveTableMetadataPtr hive_table_metadata, const HDFSFSPtr & fs)
StorageHive::listDirectory(const String & path, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs)
{
return hive_table_metadata->getFilesByLocation(fs, path);
}
HiveFilePtr StorageHive::createHiveFileIfNeeded(
const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_)
HiveFilePtr StorageHive::getHiveFileIfNeeded(
const FileInfo & file_info,
const FieldVector & fields,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const ContextPtr & context_,
PruneLevel prune_level) const
{
LOG_TRACE(log, "Append hive file {}", file_info.path);
String filename = getBaseName(file_info.path);
/// Skip temporary files starts with '.'
if (filename.find('.') == 0)
if (startsWith(filename, "."))
return {};
auto hive_file = createHiveFile(
format_name,
fields,
hdfs_namenode_url,
file_info.path,
file_info.last_modify_time,
file_info.size,
hivefile_name_types,
storage_settings,
context_);
/// Load file level minmax index and apply
const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
if (hive_file->useFileMinMaxIndex())
auto cache = hive_table_metadata->getHiveFilesCache();
auto hive_file = cache->get(file_info.path);
if (!hive_file || hive_file->getLastModTs() < file_info.last_modify_time)
{
hive_file->loadFileMinMaxIndex();
if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes())
.can_be_true)
{
LOG_TRACE(
log, "Skip hive file {} by index {}", hive_file->getPath(), hive_file->describeMinMaxIndex(hive_file->getMinMaxIndex()));
return {};
}
LOG_TRACE(log, "Create hive file {}, prune_level {}", file_info.path, pruneLevelToString(prune_level));
hive_file = createHiveFile(
format_name,
fields,
hdfs_namenode_url,
file_info.path,
file_info.last_modify_time,
file_info.size,
hivefile_name_types,
storage_settings,
context_->getGlobalContext());
cache->set(file_info.path, hive_file);
}
else
{
LOG_TRACE(log, "Get hive file {} from cache, prune_level {}", file_info.path, pruneLevelToString(prune_level));
}
/// Load sub-file level minmax index and apply
if (hive_file->useSplitMinMaxIndex())
if (prune_level >= PruneLevel::File)
{
std::unordered_set<int> skip_splits;
hive_file->loadSplitMinMaxIndex();
const auto & sub_minmax_idxes = hive_file->getSubMinMaxIndexes();
for (size_t i = 0; i < sub_minmax_idxes.size(); ++i)
const KeyCondition hivefile_key_condition(query_info, getContext(), hivefile_name_types.getNames(), hivefile_minmax_idx_expr);
if (hive_file->useFileMinMaxIndex())
{
if (!hivefile_key_condition.checkInHyperrectangle(sub_minmax_idxes[i]->hyperrectangle, hivefile_name_types.getTypes())
/// Load file level minmax index and apply
hive_file->loadFileMinMaxIndex();
if (!hivefile_key_condition.checkInHyperrectangle(hive_file->getMinMaxIndex()->hyperrectangle, hivefile_name_types.getTypes())
.can_be_true)
{
LOG_TRACE(
log,
"Skip split {} of hive file {} by index {}",
i,
"Skip hive file {} by index {}",
hive_file->getPath(),
hive_file->describeMinMaxIndex(sub_minmax_idxes[i]));
skip_splits.insert(i);
hive_file->describeMinMaxIndex(hive_file->getMinMaxIndex()));
return {};
}
}
if (prune_level >= PruneLevel::Split)
{
if (hive_file->useSplitMinMaxIndex())
{
/// Load sub-file level minmax index and apply
std::unordered_set<int> skip_splits;
hive_file->loadSplitMinMaxIndexes();
const auto & sub_minmax_idxes = hive_file->getSubMinMaxIndexes();
for (size_t i = 0; i < sub_minmax_idxes.size(); ++i)
{
if (!hivefile_key_condition.checkInHyperrectangle(sub_minmax_idxes[i]->hyperrectangle, hivefile_name_types.getTypes())
.can_be_true)
{
LOG_TRACE(
log,
"Skip split {} of hive file {} by index {}",
i,
hive_file->getPath(),
hive_file->describeMinMaxIndex(sub_minmax_idxes[i]));
skip_splits.insert(i);
}
}
hive_file->setSkipSplits(skip_splits);
}
}
hive_file->setSkipSplits(skip_splits);
}
return hive_file;
}
bool StorageHive::isColumnOriented() const
{
return format_name == "Parquet" || format_name == "ORC";
@ -607,6 +650,7 @@ void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & hea
}
}
}
Pipe StorageHive::read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
@ -620,55 +664,17 @@ Pipe StorageHive::read(
HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, context_->getGlobalContext()->getConfigRef());
HDFSFSPtr fs = createHDFSFS(builder.get());
auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext());
auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url);
auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table);
std::vector<Apache::Hadoop::Hive::Partition> partitions = hive_table_metadata->getPartitions();
/// Hive files to read
HiveFiles hive_files;
/// Mutext to protect hive_files, which maybe appended in multiple threads
std::mutex hive_files_mutex;
ThreadPool pool{num_streams};
if (!partitions.empty())
{
for (const auto & partition : partitions)
{
pool.scheduleOrThrowOnError([&]()
{
auto hive_files_in_partition = collectHiveFilesFromPartition(partition, query_info, hive_table_metadata, fs, context_);
if (!hive_files_in_partition.empty())
{
std::lock_guard<std::mutex> lock(hive_files_mutex);
hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition));
}
});
}
pool.wait();
}
else if (partition_name_types.empty()) /// Partition keys is empty
{
auto file_infos = listDirectory(hive_table_metadata->getTable()->sd.location, hive_table_metadata, fs);
for (const auto & file_info : file_infos)
{
pool.scheduleOrThrowOnError([&]
{
auto hive_file = createHiveFileIfNeeded(file_info, {}, query_info, context_);
if (hive_file)
{
std::lock_guard<std::mutex> lock(hive_files_mutex);
hive_files.push_back(hive_file);
}
});
}
pool.wait();
}
else /// Partition keys is not empty but partitions is empty
/// Collect Hive files to read
HiveFiles hive_files = collectHiveFiles(num_streams, query_info, hive_table_metadata, fs, context_);
if (hive_files.empty())
return {};
auto sources_info = std::make_shared<StorageHiveSource::SourcesInfo>();
sources_info->hive_files = std::move(hive_files);
sources_info->database = hive_database;
sources_info->database_name = hive_database;
sources_info->table_name = hive_table;
sources_info->hive_metastore_client = hive_metastore_client;
sources_info->partition_name_types = partition_name_types;
@ -705,6 +711,62 @@ Pipe StorageHive::read(
return Pipe::unitePipes(std::move(pipes));
}
HiveFiles StorageHive::collectHiveFiles(
unsigned max_threads,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const HDFSFSPtr & fs,
const ContextPtr & context_,
PruneLevel prune_level) const
{
std::vector<Apache::Hadoop::Hive::Partition> partitions = hive_table_metadata->getPartitions();
/// Hive table have no partition
if (!partition_name_types.empty() && partitions.empty())
return {};
/// Hive files to collect
HiveFiles hive_files;
/// Mutext to protect hive_files, which maybe appended in multiple threads
std::mutex hive_files_mutex;
ThreadPool pool{max_threads};
if (!partitions.empty())
{
for (const auto & partition : partitions)
{
pool.scheduleOrThrowOnError(
[&]()
{
auto hive_files_in_partition
= collectHiveFilesFromPartition(partition, query_info, hive_table_metadata, fs, context_, prune_level);
if (!hive_files_in_partition.empty())
{
std::lock_guard<std::mutex> lock(hive_files_mutex);
hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition));
}
});
}
}
else /// Partition keys is empty but still have files
{
auto file_infos = listDirectory(hive_table_metadata->getTable()->sd.location, hive_table_metadata, fs);
for (const auto & file_info : file_infos)
{
pool.scheduleOrThrowOnError(
[&]()
{
auto hive_file = getHiveFileIfNeeded(file_info, {}, query_info, hive_table_metadata, context_, prune_level);
if (hive_file)
{
std::lock_guard<std::mutex> lock(hive_files_mutex);
hive_files.push_back(hive_file);
}
});
}
}
pool.wait();
return hive_files;
}
SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot*/, ContextPtr /*context*/)
{
throw Exception("Method write is not implemented for StorageHive", ErrorCodes::NOT_IMPLEMENTED);
@ -717,6 +779,44 @@ NamesAndTypesList StorageHive::getVirtuals() const
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
}
std::optional<UInt64> StorageHive::totalRows(const Settings & settings) const
{
/// query_info is not used when prune_level == PruneLevel::None
SelectQueryInfo query_info;
return totalRowsImpl(settings, query_info, getContext(), PruneLevel::None);
}
std::optional<UInt64> StorageHive::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const
{
return totalRowsImpl(context_->getSettingsRef(), query_info, context_, PruneLevel::Partition);
}
std::optional<UInt64>
StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const
{
/// Row-based format like Text doesn't support totalRowsByPartitionPredicate
if (!isColumnOriented())
return {};
auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url);
auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table);
HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, getContext()->getGlobalContext()->getConfigRef());
HDFSFSPtr fs = createHDFSFS(builder.get());
HiveFiles hive_files = collectHiveFiles(settings.max_threads, query_info, hive_table_metadata, fs, context_, prune_level);
UInt64 total_rows = 0;
for (const auto & hive_file : hive_files)
{
auto file_rows = hive_file->getRows();
if (!file_rows)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Rows of hive file:{} with format:{} not initialized", hive_file->getPath(), format_name);
total_rows += *file_rows;
}
return total_rows;
}
void registerStorageHive(StorageFactory & factory)
{
factory.registerStorage(

View File

@ -26,7 +26,6 @@ class HiveSettings;
class StorageHive final : public shared_ptr_helper<StorageHive>, public IStorage, WithContext
{
friend struct shared_ptr_helper<StorageHive>;
public:
String getName() const override { return "Hive"; }
@ -39,7 +38,6 @@ public:
return true;
}
Pipe read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
@ -55,6 +53,9 @@ public:
bool isColumnOriented() const override;
std::optional<UInt64> totalRows(const Settings & settings) const override;
std::optional<UInt64> totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context_) const override;
protected:
friend class StorageHiveSource;
StorageHive(
@ -74,31 +75,64 @@ private:
using FileInfo = HiveMetastoreClient::FileInfo;
using HiveTableMetadataPtr = HiveMetastoreClient::HiveTableMetadataPtr;
enum class PruneLevel
{
None, /// Do not prune
Partition,
File,
Split,
Max = Split,
};
static String pruneLevelToString(PruneLevel level)
{
return String(magic_enum::enum_name(level));
}
static ASTPtr extractKeyExpressionList(const ASTPtr & node);
static std::vector<FileInfo> listDirectory(const String & path, HiveTableMetadataPtr hive_table_metadata, const HDFSFSPtr & fs);
static std::vector<FileInfo> listDirectory(const String & path, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs);
void initMinMaxIndexExpression();
std::vector<HiveFilePtr> collectHiveFilesFromPartition(
const Apache::Hadoop::Hive::Partition & partition,
SelectQueryInfo & query_info,
HiveTableMetadataPtr hive_table_metadata,
HiveFiles collectHiveFiles(
unsigned max_threads,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const HDFSFSPtr & fs,
ContextPtr context_);
const ContextPtr & context_,
PruneLevel prune_level = PruneLevel::Max) const;
HiveFilePtr
createHiveFileIfNeeded(const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_);
HiveFiles collectHiveFilesFromPartition(
const Apache::Hadoop::Hive::Partition & partition,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const HDFSFSPtr & fs,
const ContextPtr & context_,
PruneLevel prune_level = PruneLevel::Max) const;
HiveFilePtr getHiveFileIfNeeded(
const FileInfo & file_info,
const FieldVector & fields,
const SelectQueryInfo & query_info,
const HiveTableMetadataPtr & hive_table_metadata,
const ContextPtr & context_,
PruneLevel prune_level = PruneLevel::Max) const;
void getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const;
void lazyInitialize();
std::optional<UInt64>
totalRowsImpl(const Settings & settings, const SelectQueryInfo & query_info, ContextPtr context_, PruneLevel prune_level) const;
String hive_metastore_url;
/// Hive database and table
String hive_database;
String hive_table;
std::mutex init_mutex;
mutable std::mutex init_mutex;
bool has_initialized = false;
/// Hive table meta
@ -123,9 +157,8 @@ private:
std::shared_ptr<HiveSettings> storage_settings;
Poco::Logger * log = &Poco::Logger::get("StorageHive");
void lazyInitialize();
};
}
#endif

View File

@ -693,9 +693,14 @@ bool StorageKafka::streamToViews()
// We can't cancel during copyData, as it's not aware of commits and other kafka-related stuff.
// It will be cancelled on underlying layer (kafka buffer)
size_t rows = 0;
std::atomic_size_t rows = 0;
{
block_io.pipeline.complete(std::move(pipe));
// we need to read all consumers in parallel (sequential read may lead to situation
// when some of consumers are not used, and will break some Kafka consumer invariants)
block_io.pipeline.setNumThreads(stream_count);
block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); });
CompletedPipelineExecutor executor(block_io.pipeline);
executor.execute();

View File

@ -134,7 +134,7 @@ Pipe StorageS3Cluster::read(
/// So, task_identifier is passed as constructor argument. It is more obvious.
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
connection,
queryToString(query_info.query),
queryToString(query_info.original_query),
header,
context,
/*throttler=*/nullptr,

View File

@ -53,6 +53,7 @@ Pipe StorageSystemRemoteDataPaths::read(
{
std::vector<IDisk::LocalPathWithRemotePaths> remote_paths_by_local_path;
disk->getRemotePathsRecursive("store", remote_paths_by_local_path);
disk->getRemotePathsRecursive("data", remote_paths_by_local_path);
FileCachePtr cache;
auto cache_base_path = disk->getCacheBasePath();

View File

@ -4,6 +4,8 @@ import json
import logging
import os
import sys
from typing import Dict, List, Tuple
from github import Github
from env_helper import (
@ -44,7 +46,7 @@ class BuildResult:
self.with_coverage = with_coverage
def group_by_artifacts(build_urls):
def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]:
groups = {
"apk": [],
"deb": [],
@ -52,7 +54,7 @@ def group_by_artifacts(build_urls):
"tgz": [],
"rpm": [],
"performance": [],
}
} # type: Dict[str, List[str]]
for url in build_urls:
if url.endswith("performance.tgz"):
groups["performance"].append(url)
@ -74,7 +76,9 @@ def group_by_artifacts(build_urls):
return groups
def process_report(build_report):
def process_report(
build_report,
) -> Tuple[List[BuildResult], List[List[str]], List[str]]:
build_config = build_report["build_config"]
build_result = BuildResult(
compiler=build_config["compiler"],
@ -98,6 +102,7 @@ def process_report(build_report):
build_logs_urls.append(build_report["log_url"])
found_group = True
# No one group of urls is found, a failed report
if not found_group:
build_results.append(build_result)
build_urls.append([""])
@ -110,7 +115,7 @@ def get_build_name_from_file_name(file_name):
return file_name.replace("build_urls_", "").replace(".json", "")
if __name__ == "__main__":
def main():
logging.basicConfig(level=logging.INFO)
reports_path = REPORTS_PATH
temp_path = TEMP_PATH
@ -120,7 +125,7 @@ if __name__ == "__main__":
os.makedirs(temp_path)
build_check_name = sys.argv[1]
reports_length = int(sys.argv[2]) if len(sys.argv) > 2 else 0
required_builds = int(sys.argv[2]) if len(sys.argv) > 2 else 0
gh = Github(get_best_robot_token())
pr_info = PRInfo()
@ -129,19 +134,20 @@ if __name__ == "__main__":
logging.info("Check is already finished according to github status, exiting")
sys.exit(0)
reports_order = CI_CONFIG["builds_report_config"][build_check_name]
logging.info("My reports list %s", reports_order)
builds_for_check = CI_CONFIG["builds_report_config"][build_check_name]
logging.info("My reports list %s", builds_for_check)
required_builds = required_builds or len(builds_for_check)
build_reports_map = {}
for root, dirs, files in os.walk(reports_path):
# Collect reports from json artifacts
builds_report_map = {}
for root, _, files in os.walk(reports_path):
for f in files:
if f.startswith("build_urls_") and f.endswith(".json"):
logging.info("Found build report json %s", f)
build_name = get_build_name_from_file_name(f)
if build_name in reports_order:
if build_name in builds_for_check:
with open(os.path.join(root, f), "rb") as file_handler:
build_report = json.load(file_handler)
build_reports_map[build_name] = build_report
builds_report_map[build_name] = json.load(file_handler)
else:
logging.info(
"Skipping report %s for build %s, it's not in our reports list",
@ -149,44 +155,45 @@ if __name__ == "__main__":
build_name,
)
reports_length = reports_length or len(reports_order)
some_builds_are_missing = len(build_reports_map) < reports_length
if some_builds_are_missing:
logging.info(
"Expected to get %s build results, got %s",
len(reports_order),
len(build_reports_map),
)
else:
logging.info("Got exactly %s builds", len(build_reports_map))
# Sort reports by config order
build_reports = [
build_reports_map[build_name]
for build_name in reports_order
if build_name in build_reports_map
builds_report_map[build_name]
for build_name in builds_for_check
if build_name in builds_report_map
]
build_results = []
build_artifacts = []
some_builds_are_missing = len(build_reports) < required_builds
if some_builds_are_missing:
logging.warning(
"Expected to get %s build results, got only %s",
required_builds,
len(build_reports),
)
else:
logging.info("Got exactly %s builds", len(builds_report_map))
# Group build artifacts by groups
build_results = [] # type: List[BuildResult]
build_artifacts = [] #
build_logs = []
for build_report in build_reports:
build_result, build_artifacts_url, build_logs_url = process_report(build_report)
logging.info("Got %s result for report", len(build_result))
build_results += build_result
build_artifacts += build_artifacts_url
build_logs += build_logs_url
logging.info(
"Got %s artifact groups for build report report", len(build_result)
)
build_results.extend(build_result)
build_artifacts.extend(build_artifacts_url)
build_logs.extend(build_logs_url)
logging.info("Totally got %s results", len(build_results))
if len(build_results) == 0:
logging.info("No builds, failing check")
total_groups = len(build_results)
logging.info("Totally got %s artifact groups", total_groups)
if total_groups == 0:
logging.error("No success builds, failing check")
sys.exit(1)
s3_helper = S3Helper("https://s3.amazonaws.com")
pr_info = PRInfo()
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
branch_name = "master"
if pr_info.number != 0:
@ -219,9 +226,10 @@ if __name__ == "__main__":
report_path, s3_path_prefix + "/report.html"
)
logging.info("Report url %s", url)
print(f"::notice ::Report url: {url}")
total_builds = len(build_results)
ok_builds = 0
# Prepare a commit status
ok_groups = 0
summary_status = "success"
for build_result in build_results:
if build_result.status == "failure" and summary_status != "error":
@ -230,18 +238,16 @@ if __name__ == "__main__":
summary_status = "error"
if build_result.status == "success":
ok_builds += 1
ok_groups += 1
if ok_builds == 0 or some_builds_are_missing:
if ok_groups == 0 or some_builds_are_missing:
summary_status = "error"
addition = ""
if some_builds_are_missing:
addition = f"({len(build_reports_map)} < {reports_length})"
addition = f"({len(build_reports)} of {required_builds} builds are OK)"
description = f"{ok_builds}/{total_builds} builds are OK {addition}"
print(f"::notice ::Report url: {url}")
description = f"{ok_groups}/{total_groups} artifact groups are OK {addition}"
commit = get_commit(gh, pr_info.sha)
commit.create_status(
@ -253,3 +259,7 @@ if __name__ == "__main__":
if summary_status == "error":
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -1,13 +1,13 @@
FROM public.ecr.aws/lambda/python:3.9
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Install the function's dependencies using file requirements.txt
# from your project folder.
COPY requirements.txt .
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
CMD [ "app.handler" ]

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
from collections import namedtuple
from typing import Any, Dict
from typing import Any, Dict, List
import json
import time
@ -11,7 +11,7 @@ import boto3 # type: ignore
NEED_RERUN_OR_CANCELL_WORKFLOWS = {
"PullRequestCI",
"Docs",
"DocsCheck",
"DocsRelease",
"BackportPR",
}
@ -93,7 +93,9 @@ WorkflowDescription = namedtuple(
def get_workflows_description_for_pull_request(pull_request_event):
head_repo = pull_request_event["head"]["repo"]["full_name"]
head_branch = pull_request_event["head"]["ref"]
head_sha = pull_request_event["head"]["sha"]
print("PR", pull_request_event["number"], "has head ref", head_branch)
workflows_data = []
workflows = _exec_get_with_retry(
@ -111,17 +113,23 @@ def get_workflows_description_for_pull_request(pull_request_event):
print("Too many workflows found")
break
DEBUG_INFO["workflows"] = [] # type: List[Dict[str, str]]
workflow_descriptions = []
for workflow in workflows_data:
DEBUG_INFO["workflow"] = workflow
# Some time workflow["head_repository"]["full_name"] is None
if workflow["head_repository"] is None:
continue
DEBUG_INFO["workflows"].append(
{
"full_name": workflow["head_repository"]["full_name"],
"name": workflow["name"],
}
)
# unfortunately we cannot filter workflows from forks in request to API
# so doing it manually
if (
workflow["head_repository"]["full_name"]
== pull_request_event["head"]["repo"]["full_name"]
workflow["head_sha"] == head_sha
and workflow["head_repository"]["full_name"] == head_repo
and workflow["name"] in NEED_RERUN_OR_CANCELL_WORKFLOWS
):
workflow_descriptions.append(
@ -170,7 +178,7 @@ def exec_workflow_url(urls_to_cancel, token):
def main(event):
token = get_token_from_aws()
DEBUG_INFO["event_body"] = event["body"]
event_data = event["body"]
event_data = json.loads(event["body"])
print("Got event for PR", event_data["number"])
action = event_data["action"]
@ -220,7 +228,6 @@ def main(event):
def handler(event, _):
try:
main(event)
except Exception:
finally:
for name, value in DEBUG_INFO.items():
print(f"Value of {name}: ", value)
raise

View File

@ -57,10 +57,12 @@ def dowload_file_with_progress(url, path):
def get_ccache_if_not_exists(
path_to_ccache_dir, s3_helper, current_pr_number, temp_path
):
) -> int:
"""returns: number of PR for downloaded PR. -1 if ccache not found"""
ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False
prs_to_check = [current_pr_number]
ccache_pr = -1
if current_pr_number != 0:
prs_to_check.append(0)
for pr_number in prs_to_check:
@ -87,6 +89,7 @@ def get_ccache_if_not_exists(
decompress_fast(compressed_cache, path_to_decompress)
logging.info("Files on path %s", os.listdir(path_to_decompress))
cache_found = True
ccache_pr = pr_number
break
if cache_found:
break
@ -98,6 +101,8 @@ def get_ccache_if_not_exists(
else:
logging.info("ccache downloaded")
return ccache_pr
def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path):
logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number)

View File

@ -76,4 +76,5 @@ def get_images_with_versions(
def get_image_with_version(reports_path, image, pull=True, version=None):
logging.info("Looking for images file in %s", reports_path)
return get_images_with_versions(reports_path, [image], pull, version=version)[0]

View File

@ -113,7 +113,10 @@ if __name__ == "__main__":
cache_path = os.path.join(caches_path, "fasttest")
logging.info("Will try to fetch cache for our build")
get_ccache_if_not_exists(cache_path, s3_helper, pr_info.number, temp_path)
ccache_for_pr = get_ccache_if_not_exists(
cache_path, s3_helper, pr_info.number, temp_path
)
upload_master_ccache = ccache_for_pr in (-1, 0)
if not os.path.exists(cache_path):
logging.info("cache was not fetched, will create empty dir")
@ -179,6 +182,9 @@ if __name__ == "__main__":
logging.info("Will upload cache")
upload_ccache(cache_path, s3_helper, pr_info.number, temp_path)
if upload_master_ccache:
logging.info("Will upload a fallback cache for master")
upload_ccache(cache_path, s3_helper, 0, temp_path)
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, NAME, test_results)

View File

@ -1173,6 +1173,9 @@ def check_server_started(args):
retry_count -= 1
sleep(0.5)
continue
except TimeoutError:
print("\nConnection timeout, will not retry")
break
print('\nAll connection tries failed')
sys.stdout.flush()

View File

@ -1,44 +1,49 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEpAIBAAKCAQEAtz2fpa8hyUff8u8jYlh20HbkOO8hQi64Ke2Prack2Br0lhOr
1MI6I8nVk5iDrt+7ix2Cnt+2aZKb6HJv0CG1V25yWg+jgsXeIT1KHTJf8rTmYxhb
t+ye+S1Z0h/Rt+xqSd9XXfzOLPGHYfyx6ZQ4AumO/HoEFD4IH/qiREjwtOfRXuhz
CohqtUTyYR7pJmZqBSuGac461WVRisnjfKRxeVa3itc84/RgktgYej2x4PQBFk13
xAXKrWmHkwdgWklTuuK8Gtoqz65Y4/J9CSl+Bd08QDdRnaVvq1u1eNTZg1BVyeRv
jFYBMSathKASrng5nK66Fdilw6tO/9khaP0SDQIDAQABAoIBAAm/5qGrKtIJ1/mW
Dbzq1g+Lc+MvngZmc/gPIsjrjsNM09y0WT0txGgpEgsTX1ZLoy/otw16+7qsSU1Z
4WcilAJ95umx0VJg8suz9iCNkJtaUrPNFPw5Q9AgQJo0hTUTCCi8EGr4y4OKqlhl
WJYEA+LryGbYmyT0k/wXmtClTOFjKS09mK4deQ1DqbBxayR9MUZgRJzEODA8eGXs
Rc6fJUenMVNMzIVLgpossRtKImoZNcf5UtCKL3HECunndQeMu4zuqLMU+EzL1F/o
iHKF7v3CVmsK0OxNJfOfT0abN3XaJttFwTJyghQjgP8OX1IKjlj3vo9xwEDfVUEf
GVIER0UCgYEA2j+kjaT3Dw2lNlBEsA8uvVlLJHBc9JBtMGduGYsIEgsL/iStqXv4
xoA9N3CwkN/rrQpDfi/16DMXAAYdjGulPwiMNFBY22TCzhtPC2mAnfaSForxwZCs
lwc3KkIloo3N5XvN78AuZf8ewiS+bOEj+HHHqqSb1+u/csuaXO9neesCgYEA1u/I
Mlt/pxJkH+c3yOskwCh/CNhq9szi8G9LXROIQ58BT2ydJSEPpt7AhUTtQGimQQTW
KLiffJSkjuVaFckR1GjCoAmFGYw9wUb+TmFNScz5pJ2dXse8aBysAMIQfEIcRAEa
gKnkLBH6nw3+/Hm3xwoBc35t8Pa2ek7LsWDfbecCgYBhilQW4gVw+t49uf4Y2ZBA
G+pTbMx+mRXTrkYssFB5D+raOLZMqxVyUdoKLxkahpkkCxRDD1hN4JeE8Ta/jVSb
KUzQDKDJ3OybhOT86rgK4SpFXO/TXL9l+FmVT17WmZ3N1Fkjr7aM60pp5lYc/zo+
TUu5XjwwcjJsMcbZhj2u5QKBgQCDNuUn4PYAP9kCJPzIWs0XxmEvPDeorZIJqFgA
3XC9n2+EVlFlHlbYz3oGofqY7Io6fUJkn7k1q+T+G4QwcozA+KeAXe90lkoJGVcc
8IfnewwYc+RjvVoG0SIsYE0CHrX0yhus2oqiYON4gGnfJkuMZk5WfKOPjH4AEuSF
SBd+lwKBgQCHG/DA6u2mYmezPF9hebWFoyAVSr2PDXDhu8cNNHCpx9GewJXhuK/P
tW8mazHzUuJKBvmaUXDIXFh4K6FNhjH16p5jR1w3hsPE7NEZhjfVRaUYPmBqaOYR
jp8H+Sh5g4Rwbtfp6Qhu6UAKi/y6Vozs5GkJtSiNrjNDVrD+sGGrXA==
-----END RSA PRIVATE KEY-----
-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC3uaPiZMfjPBBE
yDEYJsJIoriu0SaC80uTmPM7bFpnOOXOBvbT4wD2q+uVaLQifKtPTgZAkP5Y3rX8
S5TOzaLsNp68S1Ja/EzxQUolOSgb4A948TTiUTrTjfMxsPRhmxXTjozWV8CFtL9P
Lg6H+55oyQOJedWYe1kSWRJQayXSweBK5qjOPi2qDF/xdFRQuMivpBUar/b/E9GQ
RKpIaoqMYsl/WF/tReb4N658UxkVlFdR8s48UoA9LfJLMPr4N+QDTfvtcT2bYlpT
4a9b6IXa9BQKCw3AKfTqEPO1XunH//iLNkt1bLtqgZNyT/tY0tLY3EKMXIDuRBVn
KCbfVJ1RAgMBAAECggEAJFCjXiqBgB7tMEtJuPZgTK8tRhC9RgEFHUWMPmCqdeC/
O7wQqc0i8Z8Fz+CESpTN370Sa0y9mZ9b5WSjI0VuQLaDJcDVpHpeUwmOuFDV5ryh
EkzLITjhIdPbECVkCK7433o7yFpMCaGydtopsSNBKoEhG9ljKOKotoG4pwCm10N5
K9Qepj82OjRhLkpmuiMFb4/vvOm5dglYmkq5+n/fdUYFtrYr3NvMSCTlietPHDgV
Wb3strvk1g9ARWfa2j7Q6moF2sbyob9zVLoRiD9VgmNB60v7QAJxDctVkbOoDgKp
uN2fkxTHwlOPAO6Zhgnie11jnZr1711TFxmEfMkSKQKBgQDqpB8m0hSJsWLKWxQK
yx+5Xgs+Cr8gb0AYHJQ87obj2XqwXLpBSMrkzTn6vIGRv+NMSfiM/755RUm5aJPY
om+7F68JEIL26ZA7bIfjHhV5o9fvpo+6N6cJyR08Q/KkF8Tej9K4qQec0W/jtKeZ
KAJ1k7/BBuN82iTtEJ3GWBaaRwKBgQDIcwQrGlyyXqnBK25gl/E1Ng+V3p/2sy98
1BpEshxen4KorHEXCJArydELtvK/ll6agil6QebrJN5dtYOOgvcDTu1mQjdUPN3C
VXpSQ0L8XxfyTNYQTWON9wJGL1pzlTiyHvlSrQFsFWMUoxrqndWIIRtrXjap1npp
HDrcqy2/pwKBgB5fHhUlTjlAd7wfq+l1v2Z8ENJ4C6NEIzS7xkhYy6cEiIf5iLZY
mMKi+eVFrzPRdbdzP7Poipwh5tgT/EcnR3UdLK/srjcNpni6pKA2TatQFOxVT/dX
qsxudtVNKkQpO3dfgHQclPqsdWIxCRye/CqB9Gkk3h9UEUGKTBHXZx2TAoGAF0tG
cLvfidr2Xzxs10zQ+x4NMZ1teX3ZRuhfJRyNr3FZ/cAMZGDaYDxTzsiz7Q/Mbqgx
qcN+0lS2gq1VXHpbukaxz/Bh/agVHUBRtr2aSznBzqafOcXEi/roiL94A3aT4B85
WiJAyA60NPG/bwRojClMxm1sbNA/6XceYAaEioECgYEA3m88G3UwizfJAsfT5H5K
3HXNYzQ1XGrA8shI0kxeqfNP5qmTfH5q/K2VMWeShT3F/9Ytgc+H8c9XP1wKq7Zl
6AtmdDOeLzHkgwVK0p20/Wh2Qjw4ikJLdM+y8wnfMiwCXWQxoh1X905EwNtyBc2Z
9S3G5CXldFHC4NGdx0vetiE=
-----END PRIVATE KEY-----
-----BEGIN CERTIFICATE-----
MIICqDCCAZACFBdaMnuT0pWhmrh05UT3HXJ+kI0yMA0GCSqGSIb3DQEBCwUAMA0x
CzAJBgNVBAMMAmNhMB4XDTIxMDQwNjE3MDQxNVoXDTIyMDQwNjE3MDQxNVowFDES
MBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKC
AQEAtz2fpa8hyUff8u8jYlh20HbkOO8hQi64Ke2Prack2Br0lhOr1MI6I8nVk5iD
rt+7ix2Cnt+2aZKb6HJv0CG1V25yWg+jgsXeIT1KHTJf8rTmYxhbt+ye+S1Z0h/R
t+xqSd9XXfzOLPGHYfyx6ZQ4AumO/HoEFD4IH/qiREjwtOfRXuhzCohqtUTyYR7p
JmZqBSuGac461WVRisnjfKRxeVa3itc84/RgktgYej2x4PQBFk13xAXKrWmHkwdg
WklTuuK8Gtoqz65Y4/J9CSl+Bd08QDdRnaVvq1u1eNTZg1BVyeRvjFYBMSathKAS
rng5nK66Fdilw6tO/9khaP0SDQIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQAct2If
isMLHIqyL9GjY4b0xcxF4svFU/DUwNanStmoFMW1ifPf1cCqeMzyQOxBCDdMs0RT
hBbDYHW0BMXDqYIr3Ktbu38/3iVyr3pb56YOCKy8yHXpmKEaUBhCknSLcQyvNfeS
tM+DWsKFTZfyR5px+WwXbGKVMYwLaTON+/wcv1MeKMig3CxluaCpEJVYYwAiUc4K
sgvQNAunwGmPLPoXtUnpR2ZWiQA5R6yjS1oIe+8vpryFP6kjhWs0HR0jZEtLulV5
WXUuxkqTXiBIvYpsmusoR44e9rptwLbV1wL/LUScRt9ttqFM3N5/Pof+2UwkSjGB
GAyPmw0Pkqtt+lva
MIIDazCCAlOgAwIBAgIUO9pfiBMsADdk9nBMHs10n8kaIr8wDQYJKoZIhvcNAQEL
BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yMjA0MTIwOTQxNDVaFw0yNTAx
MDUwOTQxNDVaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggEiMA0GCSqGSIb3DQEB
AQUAA4IBDwAwggEKAoIBAQC3uaPiZMfjPBBEyDEYJsJIoriu0SaC80uTmPM7bFpn
OOXOBvbT4wD2q+uVaLQifKtPTgZAkP5Y3rX8S5TOzaLsNp68S1Ja/EzxQUolOSgb
4A948TTiUTrTjfMxsPRhmxXTjozWV8CFtL9PLg6H+55oyQOJedWYe1kSWRJQayXS
weBK5qjOPi2qDF/xdFRQuMivpBUar/b/E9GQRKpIaoqMYsl/WF/tReb4N658UxkV
lFdR8s48UoA9LfJLMPr4N+QDTfvtcT2bYlpT4a9b6IXa9BQKCw3AKfTqEPO1XunH
//iLNkt1bLtqgZNyT/tY0tLY3EKMXIDuRBVnKCbfVJ1RAgMBAAGjUzBRMB0GA1Ud
DgQWBBSx7Tx8W4c6wjW0qkeG7CAMLY7YkjAfBgNVHSMEGDAWgBSx7Tx8W4c6wjW0
qkeG7CAMLY7YkjAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAb
/Up/LEIdwhiN/S3HolxY2D2BrTpKHLQuggBN4+gZlK5OksCkM46LYlP/ruHXCxbR
mQoRhmooj4TvkKyBwzvKq76O+OuRtBhXzRipnBbNTqFPLf9enJUrut8lsFrI+pdl
Nn4PSGGbFPpQ5vFRCktczwwYh0zLuZ/1DbFsbRWlDnZdvoWZdfV0qsvcBRK2DXDI
29xSfw897OpITIkaryZigQVsKv8TXhfsaq9PUuH0/z84S82QG5fR6FzULofgkylb
wXvwaSdcu3k4Lo8j77BEAEvlH8Ynja0eojx5Avl9h4iw/IOQKE4GAg56CzcequLv
clPlaBBWoD6yn+q4NhLF
-----END CERTIFICATE-----

View File

@ -76,7 +76,7 @@ def check_args_and_update_paths(args):
args.src_dir = os.path.abspath(os.path.join(CURRENT_WORK_DIR, args.dockerd_volume))
if (not os.path.exists(os.path.join(args.base_configs_dir, "config.xml"))) and (not os.path.exists(os.path.join(args.base_configs_dir, "config.yaml"))):
raise Exception("No configs.xml or configs.yaml in {}".format(args.base_configs_dir))
raise Exception("No config.xml or config.yaml in {}".format(args.base_configs_dir))
if (not os.path.exists(os.path.join(args.base_configs_dir, "users.xml"))) and (not os.path.exists(os.path.join(args.base_configs_dir, "users.yaml"))):
raise Exception("No users.xml or users.yaml in {}".format(args.base_configs_dir))
@ -279,7 +279,7 @@ if __name__ == "__main__":
cmd = "docker run {net} {tty} --rm --name {name} --privileged \
--volume={odbc_bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \
--volume={library_bridge_bin}:/clickhouse-library-bridge --volume={bin}:/clickhouse \
--volume={library_bridge_bin}:/clickhouse-library-bridge \
--volume={base_cfg}:/clickhouse-config --volume={cases_dir}:/ClickHouse/tests/integration \
--volume={src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos \
--volume=/run:/run/host:ro \

View File

@ -149,7 +149,26 @@ def test_wrong_cluster(started_cluster):
SELECT count(*) from s3Cluster(
'non_existent_cluster',
'http://minio1:9001/root/data/{clickhouse,database}/*',
'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')"""
'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
"""
)
assert "not found" in error
def test_ambiguous_join(started_cluster):
node = started_cluster.instances["s0_0_0"]
result = node.query(
"""
SELECT l.name, r.value from s3Cluster(
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as l
JOIN s3Cluster(
'cluster_simple',
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') as r
ON l.name = r.name
"""
)
assert "AMBIGUOUS_COLUMN_NAME" not in result

View File

@ -554,6 +554,17 @@ def test_insert_select_schema_inference(started_cluster):
assert int(result) == 1
def test_cluster_join(started_cluster):
result = node1.query(
"""
SELECT l.id,r.id FROM hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as l
JOIN hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') as r
ON l.id = r.id
"""
)
assert "AMBIGUOUS_COLUMN_NAME" not in result
def test_virtual_columns_2(started_cluster):
hdfs_api = started_cluster.hdfs_api

View File

@ -1135,6 +1135,76 @@ def test_kafka_consumer_hang2(kafka_cluster):
kafka_delete_topic(admin_client, topic_name)
# sequential read from different consumers leads to breaking lot of kafka invariants
# (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec)
def test_kafka_read_consumers_in_parallel(kafka_cluster):
admin_client = KafkaAdminClient(
bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)
)
topic_name = "read_consumers_in_parallel"
kafka_create_topic(admin_client, topic_name, num_partitions=8)
cancel = threading.Event()
def produce():
while not cancel.is_set():
messages = []
for _ in range(100):
messages.append(json.dumps({"key": 0, "value": 0}))
kafka_produce(kafka_cluster, "read_consumers_in_parallel", messages)
time.sleep(1)
kafka_thread = threading.Thread(target=produce)
kafka_thread.start()
# when we have more than 1 consumer in a single table,
# and kafka_thread_per_consumer=0
# all the consumers should be read in parallel, not in sequence.
# then reading in parallel 8 consumers with 1 seconds kafka_poll_timeout_ms and less than 1 sec limit
# we should have exactly 1 poll per consumer (i.e. 8 polls) every 1 seconds (from different threads)
# in case parallel consuming is not working we will have only 1 poll every 1 seconds (from the same thread).
instance.query(
f"""
DROP TABLE IF EXISTS test.kafka;
DROP TABLE IF EXISTS test.view;
DROP TABLE IF EXISTS test.consumer;
CREATE TABLE test.kafka (key UInt64, value UInt64)
ENGINE = Kafka
SETTINGS kafka_broker_list = 'kafka1:19092',
kafka_topic_list = '{topic_name}',
kafka_group_name = '{topic_name}',
kafka_format = 'JSONEachRow',
kafka_num_consumers = 8,
kafka_thread_per_consumer = 0,
kafka_poll_timeout_ms = 1000,
kafka_flush_interval_ms = 999;
CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory();
CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka;
"""
)
instance.wait_for_log_line(
"kafka.*Polled batch of [0-9]+.*read_consumers_in_parallel",
repetitions=64,
look_behind_lines=100,
timeout=30, # we should get 64 polls in ~8 seconds, but when read sequentially it will take more than 64 sec
)
cancel.set()
kafka_thread.join()
instance.query(
"""
DROP TABLE test.consumer;
DROP TABLE test.view;
DROP TABLE test.kafka;
"""
)
kafka_delete_topic(admin_client, topic_name)
def test_kafka_csv_with_delimiter(kafka_cluster):
messages = []
for i in range(50):

View File

@ -1407,3 +1407,24 @@ def test_insert_select_schema_inference(started_cluster):
f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')"
)
assert int(result) == 1
def test_parallel_reading_with_memory_limit(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
instance.query(
f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') select * from numbers(100000)"
)
result = instance.query_and_get_error(
f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_memory_limit.native') settings max_memory_usage=10000"
)
assert "Memory limit (for query) exceeded" in result
sleep(5)
# Check that server didn't crash
result = instance.query("select 1")
assert int(result) == 1

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# Tags: no-parallel
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace
@ -9,4 +10,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e
# No log lines without query id
$CLICKHOUSE_CLIENT --query_id=hello --query="SELECT count() FROM numbers(10)" 2>&1 | grep -vF ' {hello} ' | grep -P '<\w+>' ||:
$CLICKHOUSE_CLIENT --query_id=hello_00971 --query="SELECT count() FROM numbers(10)" 2>&1 | grep -vF ' {hello_00971} ' | grep -P '<\w+>' ||:

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: race
# Tags: race, no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@ -9,12 +9,12 @@ set -e
function thread1()
{
$CLICKHOUSE_CLIENT --query_id=hello --query "SELECT count() FROM numbers(1000000000)" --format Null;
$CLICKHOUSE_CLIENT --query_id=hello_01003 --query "SELECT count() FROM numbers(1000000000)" --format Null;
}
function thread2()
{
$CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id = 'hello'" --format Null
$CLICKHOUSE_CLIENT --query "KILL QUERY WHERE query_id = 'hello_01003'" --format Null
sleep 0.$RANDOM
}

View File

@ -1,5 +1,7 @@
#!/usr/bin/env bash
# Tags: no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
@ -12,7 +14,7 @@ function run_selects()
{
thread_num=$1
readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables
WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name!='zookeeper' and name!='merge_tree_metadata_cache'
WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name!='zookeeper' and name!='merge_tree_metadata_cache'
AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num")
for t in "${tables_arr[@]}"

View File

@ -11,6 +11,7 @@ set -e
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv";
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tmp";
$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int8, m Int8, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0";
$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int16, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0";
$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int16) AS SELECT n*m AS nm FROM src";
@ -154,3 +155,4 @@ $CLICKHOUSE_CLIENT --query "SELECT count(), sum(nm) FROM mv"
$CLICKHOUSE_CLIENT --query "DROP TABLE src";
$CLICKHOUSE_CLIENT --query "DROP TABLE dst";
$CLICKHOUSE_CLIENT --query "DROP TABLE mv";
$CLICKHOUSE_CLIENT --query "DROP TABLE tmp";

View File

@ -6,7 +6,7 @@
3 all_1_1_0 0
3 all_3_3_0 1
4 all_1_1_0 1 (0,0,'00000000-0000-0000-0000-000000000000') 0
4 all_2_2_0 18446744073709551615 (1,1,'00000000-0000-0000-0000-000000000000') 0
4 all_2_2_0 18446744073709551615 (0,0,'00000000-0000-0000-0000-000000000000') 0
4 all_3_3_0 0 (0,0,'00000000-0000-0000-0000-000000000000') 0
5 1
6 all_1_1_0 0
@ -19,7 +19,6 @@
1 1 AddPart 1 1 1 1 all_1_1_0
2 1 Begin 1 1 1 1
2 1 AddPart 1 1 1 1 all_2_2_0
1 1 LockPart 1 1 1 1 all_2_2_0
2 1 Rollback 1 1 1 1
3 1 Begin 1 1 1 1
3 1 AddPart 1 1 1 1 all_3_3_0

View File

@ -27,6 +27,11 @@ expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\G;\r"
expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\\r"
expect ":-] "
send -- ", 2\r"
@ -41,6 +46,14 @@ expect "1: 1"
expect "2: 2"
expect ":) "
send -- "SELECT 1\\\r"
expect ":-] "
send -- ", 2\\G;\r"
expect "Row 1:"
expect "1: 1"
expect "2: 2"
expect ":) "
send -- ""
expect eof
@ -56,6 +69,11 @@ expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\G;\r"
expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1; \r"
expect "│ 1 │"
expect ":) "
@ -65,6 +83,11 @@ expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\G; \r"
expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\r"
expect ":-] "
send -- ";\r"
@ -78,6 +101,13 @@ expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\r"
expect ":-] "
send -- "\\G;\r"
expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\r"
expect ":-] "
send -- ", 2;\r"
@ -92,5 +122,14 @@ expect "1: 1"
expect "2: 2"
expect ":) "
send -- "SELECT 1\r"
expect ":-] "
send -- ", 2\\G;\r"
expect "Row 1:"
expect "1: 1"
expect "2: 2"
expect ":) "
send -- ""
expect eof

View File

@ -23,6 +23,12 @@ expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\G;\r"
expect "Row 1:"
expect "1: 1"
expect ":) "
send -- "SELECT 1\\\r"
expect ":-] "
send -- ", 2\r"
@ -37,5 +43,14 @@ expect "1: 1"
expect "2: 2"
expect ":) "
send -- "SELECT 1\\\r"
expect ":-] "
send -- ", 2\\G;\r"
expect "Row 1:"
expect "1: 1"
expect "2: 2"
expect ":) "
send -- ""
expect eof

View File

@ -1,5 +1,18 @@
do not print any ProfileEvents packets
0
100000
print only last (and also number of rows to provide more info in case of failures)
[ 0 ] SelectedRows: 131010 (increment)
regression test for incorrect filtering out snapshots
0
regression test for overlap profile events snapshots between queries
[ 0 ] SelectedRows: 1 (increment)
[ 0 ] SelectedRows: 1 (increment)
regression test for overlap profile events snapshots between queries (clickhouse-local)
[ 0 ] SelectedRows: 1 (increment)
[ 0 ] SelectedRows: 1 (increment)
print everything
OK
print each 100 ms
OK
check that ProfileEvents is new for each query
OK

View File

@ -4,13 +4,30 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# do not print any ProfileEvents packets
echo 'do not print any ProfileEvents packets'
$CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows'
# print only last (and also number of rows to provide more info in case of failures)
$CLICKHOUSE_CLIENT --max_block_size=65505 --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l)
# print everything
echo 'print only last (and also number of rows to provide more info in case of failures)'
$CLICKHOUSE_CLIENT --max_block_size=65505 --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' |& grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception
echo 'regression test for incorrect filtering out snapshots'
$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' >& /dev/null
echo $?
echo 'regression test for overlap profile events snapshots between queries'
$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)'
echo 'regression test for overlap profile events snapshots between queries (clickhouse-local)'
$CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)'
echo 'print everything'
profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')"
test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)"
# print each 100 ms
echo 'print each 100 ms'
profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')"
test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)"
echo 'check that ProfileEvents is new for each query'
sleep_function_calls=$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select sleep(1); select 1' |& grep -c 'SleepFunctionCalls')
test "$sleep_function_calls" -eq 1 && echo OK || echo "FAIL ($sleep_function_calls)"

View File

@ -0,0 +1,240 @@
0 0
0.5 0
1 original 1
1.5 0
2 0
2.5 0
3 0
3.5 0
4 original 4
4.5 0
5 0
5.5 0
6 0
6.5 0
7 original 7
7.5 0
8 0
8.5 0
9 0
9.5 0
10 0
10.5 0
11 0
11.5 0
0 42
0.5 42
1 original 1
1.5 42
2 42
2.5 42
3 42
3.5 42
4 original 4
4.5 42
5 42
5.5 42
6 42
6.5 42
7 original 7
7.5 42
8 42
8.5 42
9 42
9.5 42
10 42
10.5 42
11 42
11.5 42
0 0
0.5 0
1 original 1
1.5 1
2 1
2.5 1
3 1
3.5 1
4 original 4
4.5 4
5 4
5.5 4
6 4
6.5 4
7 original 7
7.5 7
8 7
8.5 7
9 7
9.5 7
10 7
10.5 7
11 7
11.5 7
0 1
0.5 2
1 original 1
1.5 2
2 3
2.5 4
3 5
3.5 6
4 original 4
4.5 5
5 6
5.5 7
6 8
6.5 9
7 original 7
7.5 8
8 9
8.5 10
9 11
9.5 12
10 13
10.5 14
11 15
11.5 16
0 1
0.5 2
1 original 2
1.5 3
2 4
2.5 5
3 6
3.5 7
4 original 5
4.5 6
5 7
5.5 8
6 9
6.5 10
7 original 8
7.5 9
8 10
8.5 11
9 12
9.5 13
10 14
10.5 15
11 16
11.5 17
0
0
original 1
3
3
3
3
3
original 4
9
9
9
9
9
original 7
15
15
15
15
15
15
15
15
15
0 0
0.5 0
1 original 1
1.5 3
2 3
2.5 3
3 3
3.5 3
4 original 4
4.5 9
5 9
5.5 9
6 9
6.5 9
7 original 7
7.5 15
8 15
8.5 15
9 15
9.5 15
10 15
10.5 15
11 15
11.5 15
0 1
0.5 2
1 original 1
1.5 2
2 3
2.5 4
3 5
3.5 6
4 original 4
4.5 5
5 6
5.5 7
6 8
6.5 9
7 original 7
7.5 8
8 9
8.5 10
9 11
9.5 12
10 13
10.5 14
11 15
11.5 16
0 \N
0.5 \N
1 original \N
1.5 \N
2 \N
2.5 \N
3 \N
3.5 \N
4 original \N
4.5 \N
5 \N
5.5 \N
6 \N
6.5 \N
7 original \N
7.5 \N
8 \N
8.5 \N
9 \N
9.5 \N
10 \N
10.5 \N
11 \N
11.5 \N
0 \N
0.5 \N
1 original \N
1.5 \N
2 \N
2.5 \N
3 \N
3.5 \N
4 original \N
4.5 \N
5 \N
5.5 \N
6 \N
6.5 \N
7 original \N
7.5 \N
8 \N
8.5 \N
9 \N
9.5 \N
10 \N
10.5 \N
11 \N
11.5 \N

View File

@ -0,0 +1,72 @@
# Test WITH FILL without INTERPOLATE
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5;
# Test INTERPOLATE with const
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS 42);
# Test INTERPOLATE with field value
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS inter);
# Test INTERPOLATE with expression
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS inter + 1);
# Test INTERPOLATE with incompatible const - should produce error
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS 'inter'); -- { serverError 6 }
# Test INTERPOLATE with incompatible expression - should produce error
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS inter||'inter'); -- { serverError 44 }
# Test INTERPOLATE with column from WITH FILL expression - should produce error
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (n AS n); -- { serverError 475 }
# Test INTERPOLATE with inconsistent column - should produce error
SELECT n, source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number as inter FROM numbers(10) WHERE number % 3 = 1
) ORDER BY n WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE (inter AS source); -- { serverError 32 }
# Test INTERPOLATE with aliased column
SELECT n, source, inter + 1 AS inter_p FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE (number % 3) = 1
) ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter_p AS inter_p + 1 );
# Test INTERPOLATE with column not present in select
SELECT source, inter FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter, number + 1 AS inter2 FROM numbers(10) WHERE (number % 3) = 1
) ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter AS inter2 + inter );
# Test INTERPOLATE in sub-select
SELECT n, source, inter FROM (
SELECT n, source, inter, inter2 FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter, number + 1 AS inter2 FROM numbers(10) WHERE (number % 3) = 1
) ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter AS inter + inter2 )
);
# Test INTERPOLATE with aggregates
SELECT n, any(source), sum(inter) AS inter_s FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE (number % 3) = 1
) GROUP BY n
ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter_s AS inter_s + 1 );
# Test INTERPOLATE with Nullable in result
SELECT n, source, inter + NULL AS inter_p FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number AS inter FROM numbers(10) WHERE (number % 3) = 1
) ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter_p AS inter_p + 1 );
# Test INTERPOLATE with Nullable in source
SELECT n, source, inter AS inter_p FROM (
SELECT toFloat32(number % 10) AS n, 'original' AS source, number + NULL AS inter FROM numbers(10) WHERE (number % 3) = 1
) ORDER BY n ASC WITH FILL FROM 0 TO 11.51 STEP 0.5 INTERPOLATE ( inter_p AS inter_p + 1 );

View File

@ -0,0 +1,33 @@
1991-08-24 21:04:00
1991-08-24 21:04:00
1991-08-24 19:04:00
DateTime
DateTime(\'CET\')
1970-01-01 00:00:00
1970-01-01 00:00:00
2106-02-07 06:28:15
2106-02-07 06:28:15
2106-02-07 06:28:15
2106-02-07 06:28:15
2106-02-07 06:28:15
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1984-01-02 01:00:00
1984-01-01 01:10:00
1984-01-01 00:01:10
1984-01-01 00:00:00
1983-03-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
2106-02-07 06:28:15
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00
1970-01-01 00:00:00

View File

@ -0,0 +1,39 @@
select makeDateTime(1991, 8, 24, 21, 4, 0);
select makeDateTime(1991, 8, 24, 21, 4, 0, 'CET');
select cast(makeDateTime(1991, 8, 24, 21, 4, 0, 'CET') as DateTime('UTC'));
select toTypeName(makeDateTime(1991, 8, 24, 21, 4, 0));
select toTypeName(makeDateTime(1991, 8, 24, 21, 4, 0, 'CET'));
select makeDateTime(1925, 1, 1, 0, 0, 0, 'UTC');
select makeDateTime(1924, 12, 31, 23, 59, 59, 'UTC');
select makeDateTime(2283, 11, 11, 23, 59, 59, 'UTC');
select makeDateTime(2283, 11, 12, 0, 0, 0, 'UTC');
select makeDateTime(2262, 4, 11, 23, 47, 16, 'UTC');
select makeDateTime(2262, 4, 11, 23, 47, 17, 'UTC');
select makeDateTime(2262, 4, 11, 23, 47, 16, 'UTC');
select makeDateTime(1984, 0, 1, 0, 0, 0, 'UTC');
select makeDateTime(1984, 1, 0, 0, 0, 0, 'UTC');
select makeDateTime(1984, 13, 1, 0, 0, 0, 'UTC');
select makeDateTime(1984, 1, 41, 0, 0, 0, 'UTC');
select makeDateTime(1984, 1, 1, 25, 0, 0, 'UTC');
select makeDateTime(1984, 1, 1, 0, 70, 0, 'UTC');
select makeDateTime(1984, 1, 1, 0, 0, 70, 'UTC');
select makeDateTime(1984, 1, 1, 0, 0, 0, 'not a timezone'); -- { serverError 1000 }
select makeDateTime(1984, 1, 1, 0, 0, 0, 'UTC');
select makeDateTime(1983, 2, 29, 0, 0, 0, 'UTC');
select makeDateTime(-1984, 1, 1, 0, 0, 0, 'UTC');
select makeDateTime(1984, -1, 1, 0, 0, 0, 'UTC');
select makeDateTime(1984, 1, -1, 0, 0, 0, 'UTC');
select makeDateTime(1984, 1, 1, -1, 0, 0, 'UTC');
select makeDateTime(1984, 1, 1, 0, -1, 0, 'UTC');
select makeDateTime(1984, 1, 1, 0, 0, -1, 'UTC');
select makeDateTime(65537, 8, 24, 21, 4, 0, 'UTC');
select makeDateTime(1991, 65537, 24, 21, 4, 0, 'UTC');
select makeDateTime(1991, 8, 65537, 21, 4, 0, 'UTC');
select makeDateTime(1991, 8, 24, 65537, 4, 0, 'UTC');
select makeDateTime(1991, 8, 24, 21, 65537, 0, 'UTC');
select makeDateTime(1991, 8, 24, 21, 4, 65537, 'UTC');

View File

@ -0,0 +1,69 @@
1991-08-24 21:04:00.000
1991-08-24 21:04:00.123
1991-08-24 21:04:00.001234
1991-08-24 21:04:00.0001234
1991-08-24 19:04:00.0001234
DateTime64(3)
DateTime64(3)
DateTime64(6)
DateTime64(7, \'CET\')
DateTime64(7, \'UTC\')
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
2283-11-11 23:59:59.99999999
2262-04-11 23:47:16.854775807
2262-04-11 23:47:16.85477581
1991-08-24 21:04:00
1991-08-24 21:04:00.9
1991-08-24 21:04:00.99
1991-08-24 21:04:00.999
1991-08-24 21:04:00.1234
1991-08-24 21:04:00.01234
1991-08-24 21:04:00.001234
1991-08-24 21:04:00.0001234
1991-08-24 21:04:00.00001234
1991-08-24 21:04:00.000001234
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1984-01-02 01:00:00.000000000
1984-01-01 01:10:00.000000000
1984-01-01 00:01:10.000000000
1984-01-01 02:03:04.000000005
1984-02-29 02:03:04.000000005
1983-03-01 02:03:04.000000005
1984-03-01 02:03:04.000000005
1983-03-02 02:03:04.000000005
1984-03-02 02:03:04.000000005
1983-03-03 02:03:04.000000005
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1984-01-01 02:03:04.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1925-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
1984-01-01 00:00:00.000000000
2283-11-11 23:59:59.999
1925-01-01 00:00:00.000
1925-01-01 00:00:00.000
1925-01-01 00:00:00.000
1925-01-01 00:00:00.000
1925-01-01 00:00:00.000

View File

@ -0,0 +1,89 @@
select makeDateTime64(1991, 8, 24, 21, 4, 0);
select makeDateTime64(1991, 8, 24, 21, 4, 0, 123);
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 6);
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 7, 'CET');
select cast(makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 7, 'CET') as DateTime64(7, 'UTC'));
select toTypeName(makeDateTime64(1991, 8, 24, 21, 4, 0));
select toTypeName(makeDateTime64(1991, 8, 24, 21, 4, 0, 123));
select toTypeName(makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 6));
select toTypeName(makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 7, 'CET'));
select toTypeName(cast(makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 7, 'CET') as DateTime64(7, 'UTC')));
select makeDateTime64(1925, 1, 1, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1924, 12, 31, 23, 59, 59, 999999999, 9, 'UTC');
select makeDateTime64(2283, 11, 11, 23, 59, 59, 99999999, 8, 'UTC');
select makeDateTime64(2283, 11, 11, 23, 59, 59, 999999999, 9, 'UTC'); -- { serverError 407 }
select makeDateTime64(2262, 4, 11, 23, 47, 16, 854775807, 9, 'UTC');
select makeDateTime64(2262, 4, 11, 23, 47, 16, 854775808, 9, 'UTC'); -- { serverError 407 }
select makeDateTime64(2262, 4, 11, 23, 47, 16, 85477581, 8, 'UTC');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 0, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 1, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 2, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 3, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 4, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 5, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 6, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 7, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 8, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 9, 'CET');
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, 10, 'CET'); -- { serverError 69 }
select makeDateTime64(1991, 8, 24, 21, 4, 0, 1234, -1, 'CET'); -- { serverError 69 }
select makeDateTime64(1984, 0, 1, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 0, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 13, 1, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 41, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 25, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 70, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0, 70, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9, 'not a timezone'); -- { serverError 1000 }
select makeDateTime64(1984, 1, 1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 2, 29, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1983, 2, 29, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 2, 30, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1983, 2, 30, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 2, 31, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1983, 2, 31, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 2, 32, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1983, 2, 32, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(-1984, 1, 1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, -1, 1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, -1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, -1, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, -1, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, 3, -1, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, 3, 4, -1, 9, 'UTC');
select makeDateTime64(NaN, 1, 1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, NaN, 1, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, NaN, 2, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, NaN, 3, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, NaN, 4, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, 3, NaN, 5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 2, 3, 4, NaN, 9, 'UTC');
select makeDateTime64(1984.5, 1, 1, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1.5, 1, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1.5, 0, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0.5, 0, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0.5, 0, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0, 0.5, 0, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0, 0, 0.5, 9, 'UTC');
select makeDateTime64(1984, 1, 1, 0, 0, 0, 0, 9.5, 'UTC');
select makeDateTime64(65537, 8, 24, 21, 4, 0);
select makeDateTime64(1991, 65537, 24, 21, 4, 0);
select makeDateTime64(1991, 8, 65537, 21, 4, 0);
select makeDateTime64(1991, 8, 24, 65537, 4, 0);
select makeDateTime64(1991, 8, 24, 21, 65537, 0);
select makeDateTime64(1991, 8, 24, 21, 4, 65537);
select makeDateTime64(year, 1, 1, 1, 0, 0, 0, precision, timezone) from (
select 1984 as year, 5 as precision, 'UTC' as timezone
union all
select 1985 as year, 5 as precision, 'UTC' as timezone
); -- { serverError 43 }

View File

@ -0,0 +1,10 @@
0 1
1 2
2 3
a 2
0 10000000
1 10000000
2 10000000
0 10000000
1 10000000
2 10000000

View File

@ -0,0 +1,16 @@
SELECT
number,
leadInFrame(number) OVER w AS W
FROM numbers(10)
WINDOW w AS (ORDER BY number ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
LIMIT 3;
WITH arrayJoin(['a', 'a', 'b', 'b']) AS field
SELECT
field,
count() OVER (PARTITION BY field)
ORDER BY field ASC
LIMIT 1;
select * from ( ( select *, count() over () cnt from ( select * from numbers(10000000) ) ) ) limit 3 ;
select * from ( ( select *, count() over () cnt from ( select * from numbers(10000000) ) ) ) order by number limit 3 ;

View File

@ -9,11 +9,11 @@ FORMATS=('CSV' 'CSVWithNames')
for format in "${FORMATS[@]}"
do
echo "$format, false";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=false -q \
"SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum
echo "$format, true";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=true -q \
"SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum
done

View File

@ -11,10 +11,10 @@ FORMATS=('JSONEachRow' 'JSONCompactEachRow' 'JSONCompactStringsEachRow' 'JSONCom
for format in "${FORMATS[@]}"
do
echo "$format, false";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=false -q \
"SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum
echo "$format, true";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=true -q \
"SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum
done

View File

@ -13,9 +13,9 @@ do
$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()"
echo "$format, false";
$CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --output_format_parallel_formatting=false -q \
"SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \
$CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format"
$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format"
$CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names"
@ -23,9 +23,9 @@ do
$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()"
echo "$format, true";
$CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --output_format_parallel_formatting=false -q \
"SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \
$CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format"
$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format"
$CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names"

View File

@ -13,9 +13,9 @@ do
$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()"
echo "$format, false";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=false -q \
"SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \
$CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format"
$CLICKHOUSE_CLIENT --max_threads=0 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format"
$CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names"
@ -23,9 +23,9 @@ do
$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()"
echo "$format, true";
$CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \
$CLICKHOUSE_CLIENT --max_threads=0 --output_format_parallel_formatting=false -q \
"SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \
$CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format"
$CLICKHOUSE_CLIENT --max_threads=0 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format"
$CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names"