2017-04-01 09:19:00 +00:00
|
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2011-09-04 21:23:19 +00:00
|
|
|
|
|
2019-10-04 17:46:36 +00:00
|
|
|
|
#include <Core/Field.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
|
#include <Interpreters/ProcessList.h>
|
2019-11-04 19:17:27 +00:00
|
|
|
|
#include <Access/QuotaContext.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
|
#include <Common/CurrentThread.h>
|
2019-07-10 20:47:39 +00:00
|
|
|
|
#include <common/sleep.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
|
|
|
|
|
|
namespace ProfileEvents
|
|
|
|
|
{
|
|
|
|
|
extern const Event ThrottlerSleepMicroseconds;
|
|
|
|
|
}
|
2018-06-05 19:46:49 +00:00
|
|
|
|
|
2011-09-04 21:23:19 +00:00
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
|
namespace ErrorCodes
|
|
|
|
|
{
|
2020-02-25 18:02:41 +00:00
|
|
|
|
extern const int QUERY_WAS_CANCELLED;
|
|
|
|
|
extern const int OUTPUT_IS_NOT_SORTED;
|
2019-01-23 14:48:50 +00:00
|
|
|
|
extern const int TOO_MANY_ROWS;
|
|
|
|
|
extern const int TOO_MANY_BYTES;
|
|
|
|
|
extern const int TOO_MANY_ROWS_OR_BYTES;
|
|
|
|
|
extern const int LOGICAL_ERROR;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
extern const int TOO_DEEP_PIPELINE;
|
2016-01-11 21:46:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-10-04 17:46:36 +00:00
|
|
|
|
const SortDescription & IBlockInputStream::getSortDescription() const
|
|
|
|
|
{
|
|
|
|
|
throw Exception("Output of " + getName() + " is not sorted", ErrorCodes::OUTPUT_IS_NOT_SORTED);
|
|
|
|
|
}
|
2012-03-05 02:34:20 +00:00
|
|
|
|
|
2019-01-23 14:48:50 +00:00
|
|
|
|
/// It's safe to access children without mutex as long as these methods are called before first call to `read()` or `readPrefix()`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Block IBlockInputStream::read()
|
|
|
|
|
{
|
|
|
|
|
if (total_rows_approx)
|
|
|
|
|
{
|
|
|
|
|
progressImpl(Progress(0, 0, total_rows_approx));
|
|
|
|
|
total_rows_approx = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!info.started)
|
|
|
|
|
{
|
|
|
|
|
info.total_stopwatch.start();
|
|
|
|
|
info.started = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Block res;
|
|
|
|
|
|
|
|
|
|
if (isCancelledOrThrowIfKilled())
|
|
|
|
|
return res;
|
|
|
|
|
|
2020-01-27 17:12:55 +00:00
|
|
|
|
if (!checkTimeLimit())
|
2019-01-23 14:48:50 +00:00
|
|
|
|
limit_exceeded_need_break = true;
|
|
|
|
|
|
|
|
|
|
if (!limit_exceeded_need_break)
|
|
|
|
|
res = readImpl();
|
|
|
|
|
|
|
|
|
|
if (res)
|
|
|
|
|
{
|
|
|
|
|
info.update(res);
|
|
|
|
|
|
|
|
|
|
if (enabled_extremes)
|
|
|
|
|
updateExtremes(res);
|
|
|
|
|
|
|
|
|
|
if (limits.mode == LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES))
|
|
|
|
|
limit_exceeded_need_break = true;
|
|
|
|
|
|
2019-11-04 19:17:27 +00:00
|
|
|
|
if (quota)
|
2019-01-23 14:48:50 +00:00
|
|
|
|
checkQuota(res);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/** If the thread is over, then we will ask all children to abort the execution.
|
|
|
|
|
* This makes sense when running a query with LIMIT
|
|
|
|
|
* - there is a situation when all the necessary data has already been read,
|
|
|
|
|
* but children sources are still working,
|
|
|
|
|
* herewith they can work in separate threads or even remotely.
|
|
|
|
|
*/
|
|
|
|
|
cancel(false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
progress(Progress(res.rows(), res.bytes()));
|
|
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
if (res)
|
|
|
|
|
{
|
|
|
|
|
Block header = getHeader();
|
|
|
|
|
if (header)
|
|
|
|
|
assertBlocksHaveEqualStructure(res, header, getName());
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::readPrefix()
|
|
|
|
|
{
|
2019-02-06 21:40:49 +00:00
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
if (!read_prefix_is_called)
|
|
|
|
|
read_prefix_is_called = true;
|
|
|
|
|
else
|
2019-02-08 17:17:16 +00:00
|
|
|
|
throw Exception("readPrefix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR);
|
2019-02-06 21:40:49 +00:00
|
|
|
|
#endif
|
|
|
|
|
|
2019-01-23 14:48:50 +00:00
|
|
|
|
readPrefixImpl();
|
|
|
|
|
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
child.readPrefix();
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::readSuffix()
|
|
|
|
|
{
|
2019-02-06 21:40:49 +00:00
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
if (!read_suffix_is_called)
|
|
|
|
|
read_suffix_is_called = true;
|
|
|
|
|
else
|
2019-02-08 15:21:06 +00:00
|
|
|
|
throw Exception("readSuffix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR);
|
2019-02-06 21:40:49 +00:00
|
|
|
|
#endif
|
|
|
|
|
|
2019-01-23 14:48:50 +00:00
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
child.readSuffix();
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
readSuffixImpl();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::updateExtremes(Block & block)
|
|
|
|
|
{
|
|
|
|
|
size_t num_columns = block.columns();
|
|
|
|
|
|
|
|
|
|
if (!extremes)
|
|
|
|
|
{
|
|
|
|
|
MutableColumns extremes_columns(num_columns);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
{
|
|
|
|
|
const ColumnPtr & src = block.safeGetByPosition(i).column;
|
|
|
|
|
|
2019-06-27 19:28:52 +00:00
|
|
|
|
if (isColumnConst(*src))
|
2019-01-23 14:48:50 +00:00
|
|
|
|
{
|
|
|
|
|
/// Equal min and max.
|
|
|
|
|
extremes_columns[i] = src->cloneResized(2);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Field min_value;
|
|
|
|
|
Field max_value;
|
|
|
|
|
|
|
|
|
|
src->getExtremes(min_value, max_value);
|
|
|
|
|
|
|
|
|
|
extremes_columns[i] = src->cloneEmpty();
|
|
|
|
|
|
|
|
|
|
extremes_columns[i]->insert(min_value);
|
|
|
|
|
extremes_columns[i]->insert(max_value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extremes = block.cloneWithColumns(std::move(extremes_columns));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
{
|
|
|
|
|
ColumnPtr & old_extremes = extremes.safeGetByPosition(i).column;
|
|
|
|
|
|
2019-06-27 19:28:52 +00:00
|
|
|
|
if (isColumnConst(*old_extremes))
|
2019-01-23 14:48:50 +00:00
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
Field min_value = (*old_extremes)[0];
|
|
|
|
|
Field max_value = (*old_extremes)[1];
|
|
|
|
|
|
|
|
|
|
Field cur_min_value;
|
|
|
|
|
Field cur_max_value;
|
|
|
|
|
|
|
|
|
|
block.safeGetByPosition(i).column->getExtremes(cur_min_value, cur_max_value);
|
|
|
|
|
|
|
|
|
|
if (cur_min_value < min_value)
|
|
|
|
|
min_value = cur_min_value;
|
|
|
|
|
if (cur_max_value > max_value)
|
|
|
|
|
max_value = cur_max_value;
|
|
|
|
|
|
|
|
|
|
MutableColumnPtr new_extremes = old_extremes->cloneEmpty();
|
|
|
|
|
|
|
|
|
|
new_extremes->insert(min_value);
|
|
|
|
|
new_extremes->insert(max_value);
|
|
|
|
|
|
|
|
|
|
old_extremes = std::move(new_extremes);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2020-01-27 17:12:55 +00:00
|
|
|
|
bool IBlockInputStream::checkTimeLimit()
|
|
|
|
|
{
|
|
|
|
|
return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2019-01-23 14:48:50 +00:00
|
|
|
|
void IBlockInputStream::checkQuota(Block & block)
|
|
|
|
|
{
|
|
|
|
|
switch (limits.mode)
|
|
|
|
|
{
|
|
|
|
|
case LIMITS_TOTAL:
|
|
|
|
|
/// Checked in `progress` method.
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case LIMITS_CURRENT:
|
|
|
|
|
{
|
2019-11-04 19:17:27 +00:00
|
|
|
|
UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds();
|
|
|
|
|
quota->used({Quota::RESULT_ROWS, block.rows()}, {Quota::RESULT_BYTES, block.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed});
|
2019-01-23 14:48:50 +00:00
|
|
|
|
prev_elapsed = total_elapsed;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::progressImpl(const Progress & value)
|
|
|
|
|
{
|
|
|
|
|
if (progress_callback)
|
|
|
|
|
progress_callback(value);
|
|
|
|
|
|
|
|
|
|
if (process_list_elem)
|
|
|
|
|
{
|
|
|
|
|
if (!process_list_elem->updateProgressIn(value))
|
|
|
|
|
cancel(/* kill */ true);
|
|
|
|
|
|
|
|
|
|
/// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers.
|
|
|
|
|
|
|
|
|
|
ProgressValues progress = process_list_elem->getProgressIn();
|
2019-05-20 11:37:41 +00:00
|
|
|
|
size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read);
|
2019-01-23 14:48:50 +00:00
|
|
|
|
|
|
|
|
|
/** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
|
|
|
|
|
* NOTE: Maybe it makes sense to have them checked directly in ProcessList?
|
|
|
|
|
*/
|
2019-10-03 18:27:11 +00:00
|
|
|
|
if (limits.mode == LIMITS_TOTAL)
|
2019-01-23 14:48:50 +00:00
|
|
|
|
{
|
2019-10-03 18:27:11 +00:00
|
|
|
|
if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read",
|
|
|
|
|
ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
|
|
|
|
|
cancel(false);
|
2019-01-23 14:48:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
2019-05-20 11:37:41 +00:00
|
|
|
|
size_t total_rows = progress.total_rows_to_read;
|
2019-01-23 14:48:50 +00:00
|
|
|
|
|
|
|
|
|
constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
|
|
|
|
|
UInt64 total_elapsed_microseconds = info.total_stopwatch.elapsedMicroseconds();
|
|
|
|
|
|
|
|
|
|
if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
|
|
|
|
|
{
|
|
|
|
|
CurrentThread::updatePerformanceCounters();
|
|
|
|
|
last_profile_events_update_time = total_elapsed_microseconds;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-10 14:16:15 +00:00
|
|
|
|
limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
|
2019-01-23 14:48:50 +00:00
|
|
|
|
|
2019-11-04 19:17:27 +00:00
|
|
|
|
if (quota && limits.mode == LIMITS_TOTAL)
|
|
|
|
|
quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes});
|
2019-01-23 14:48:50 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::cancel(bool kill)
|
|
|
|
|
{
|
|
|
|
|
if (kill)
|
|
|
|
|
is_killed = true;
|
|
|
|
|
|
|
|
|
|
bool old_val = false;
|
|
|
|
|
if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
child.cancel(kill);
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool IBlockInputStream::isCancelled() const
|
|
|
|
|
{
|
|
|
|
|
return is_cancelled;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool IBlockInputStream::isCancelledOrThrowIfKilled() const
|
|
|
|
|
{
|
|
|
|
|
if (!is_cancelled)
|
|
|
|
|
return false;
|
|
|
|
|
if (is_killed)
|
|
|
|
|
throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::setProgressCallback(const ProgressCallback & callback)
|
|
|
|
|
{
|
|
|
|
|
progress_callback = callback;
|
|
|
|
|
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
child.setProgressCallback(callback);
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void IBlockInputStream::setProcessListElement(QueryStatus * elem)
|
|
|
|
|
{
|
|
|
|
|
process_list_elem = elem;
|
|
|
|
|
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
child.setProcessListElement(elem);
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Block IBlockInputStream::getTotals()
|
|
|
|
|
{
|
|
|
|
|
if (totals)
|
|
|
|
|
return totals;
|
|
|
|
|
|
|
|
|
|
Block res;
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
res = child.getTotals();
|
|
|
|
|
if (res)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Block IBlockInputStream::getExtremes()
|
|
|
|
|
{
|
|
|
|
|
if (extremes)
|
|
|
|
|
return extremes;
|
|
|
|
|
|
|
|
|
|
Block res;
|
|
|
|
|
forEachChild([&] (IBlockInputStream & child)
|
|
|
|
|
{
|
|
|
|
|
res = child.getExtremes();
|
|
|
|
|
if (res)
|
|
|
|
|
return true;
|
|
|
|
|
return false;
|
|
|
|
|
});
|
|
|
|
|
return res;
|
|
|
|
|
}
|
2018-02-23 10:43:24 +00:00
|
|
|
|
|
|
|
|
|
|
2012-06-25 03:42:08 +00:00
|
|
|
|
String IBlockInputStream::getTreeID() const
|
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
|
std::stringstream s;
|
|
|
|
|
s << getName();
|
|
|
|
|
|
|
|
|
|
if (!children.empty())
|
|
|
|
|
{
|
|
|
|
|
s << "(";
|
|
|
|
|
for (BlockInputStreams::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (it != children.begin())
|
|
|
|
|
s << ", ";
|
|
|
|
|
s << (*it)->getTreeID();
|
|
|
|
|
}
|
|
|
|
|
s << ")";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return s.str();
|
2012-06-25 03:42:08 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-12-25 20:36:35 +00:00
|
|
|
|
|
2012-12-26 20:29:28 +00:00
|
|
|
|
size_t IBlockInputStream::checkDepthImpl(size_t max_depth, size_t level) const
|
2012-12-25 20:36:35 +00:00
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
|
if (children.empty())
|
|
|
|
|
return 0;
|
2012-12-25 20:36:35 +00:00
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
|
if (level > max_depth)
|
|
|
|
|
throw Exception("Query pipeline is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_PIPELINE);
|
2012-12-25 20:36:35 +00:00
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
|
size_t res = 0;
|
|
|
|
|
for (BlockInputStreams::const_iterator it = children.begin(); it != children.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
size_t child_depth = (*it)->checkDepth(level + 1);
|
|
|
|
|
if (child_depth > res)
|
|
|
|
|
res = child_depth;
|
|
|
|
|
}
|
2012-12-25 20:36:35 +00:00
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
|
return res + 1;
|
2012-12-25 20:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-08-13 01:57:35 +00:00
|
|
|
|
|
2018-08-31 01:46:39 +00:00
|
|
|
|
void IBlockInputStream::dumpTree(std::ostream & ostr, size_t indent, size_t multiplier) const
|
2011-09-04 21:23:19 +00:00
|
|
|
|
{
|
2018-01-09 01:51:08 +00:00
|
|
|
|
ostr << String(indent, ' ') << getName();
|
|
|
|
|
if (multiplier > 1)
|
|
|
|
|
ostr << " × " << multiplier;
|
2018-03-01 06:08:09 +00:00
|
|
|
|
//ostr << ": " << getHeader().dumpStructure();
|
2018-01-09 01:51:08 +00:00
|
|
|
|
ostr << std::endl;
|
|
|
|
|
++indent;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
2018-01-09 01:51:08 +00:00
|
|
|
|
/// If the subtree is repeated several times, then we output it once with the multiplier.
|
|
|
|
|
using Multipliers = std::map<String, size_t>;
|
|
|
|
|
Multipliers multipliers;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
2018-08-31 01:46:39 +00:00
|
|
|
|
for (const auto & child : children)
|
|
|
|
|
++multipliers[child->getTreeID()];
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
2018-08-31 01:46:39 +00:00
|
|
|
|
for (const auto & child : children)
|
2018-01-09 01:51:08 +00:00
|
|
|
|
{
|
2018-08-31 01:46:39 +00:00
|
|
|
|
String id = child->getTreeID();
|
2018-01-09 01:51:08 +00:00
|
|
|
|
size_t & subtree_multiplier = multipliers[id];
|
|
|
|
|
if (subtree_multiplier != 0) /// Already printed subtrees are marked with zero in the array of multipliers.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
{
|
2018-08-31 01:46:39 +00:00
|
|
|
|
child->dumpTree(ostr, indent, subtree_multiplier);
|
2018-01-09 01:51:08 +00:00
|
|
|
|
subtree_multiplier = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2012-03-05 02:34:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
2011-09-04 21:23:19 +00:00
|
|
|
|
}
|