mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
dbms: development [#CONV-2944].
This commit is contained in:
parent
b2a826727e
commit
987784ed80
@ -62,6 +62,9 @@ public:
|
|||||||
|
|
||||||
operator bool() const { return !data.empty(); }
|
operator bool() const { return !data.empty(); }
|
||||||
bool operator!() const { return data.empty(); }
|
bool operator!() const { return data.empty(); }
|
||||||
|
|
||||||
|
/** Получить список имён столбцов через запятую. */
|
||||||
|
std::string dumpNames() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
41
dbms/include/DB/DataStreams/ExpressionBlockInputStream.h
Normal file
41
dbms/include/DB/DataStreams/ExpressionBlockInputStream.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Poco/SharedPtr.h>
|
||||||
|
|
||||||
|
#include <DB/Interpreters/Expression.h>
|
||||||
|
#include <DB/DataStreams/IBlockInputStream.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
using Poco::SharedPtr;
|
||||||
|
|
||||||
|
|
||||||
|
/** Выполняет над блоком вычисление некоторого выражения.
|
||||||
|
* Выражение состоит из идентификаторов столбцов из блока, констант, обычных функций.
|
||||||
|
* Например: hits * 2 + 3, instr("yandex", url)
|
||||||
|
* Выражение не меняет количество строк в потоке, и обрабатывает каждую строку независимо от других.
|
||||||
|
*/
|
||||||
|
class ExpressionBlockInputStream : public IBlockInputStream
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ExpressionBlockInputStream(SharedPtr<IBlockInputStream> input_, SharedPtr<Expression> expression_)
|
||||||
|
: input(input_), expression(expression_) {}
|
||||||
|
|
||||||
|
Block read()
|
||||||
|
{
|
||||||
|
Block res = input->read();
|
||||||
|
if (!res)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
expression->execute(res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
SharedPtr<IBlockInputStream> input;
|
||||||
|
SharedPtr<Expression> expression;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -29,6 +29,10 @@ public:
|
|||||||
*/
|
*/
|
||||||
void execute(Block & block);
|
void execute(Block & block);
|
||||||
|
|
||||||
|
/** Получить список типов столбцов результата.
|
||||||
|
*/
|
||||||
|
DataTypes getReturnTypes();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ASTPtr ast;
|
ASTPtr ast;
|
||||||
const Context & context;
|
const Context & context;
|
||||||
@ -67,6 +71,8 @@ private:
|
|||||||
Block projectResult(ASTPtr ast, Block & block);
|
Block projectResult(ASTPtr ast, Block & block);
|
||||||
|
|
||||||
void collectFinalColumns(ASTPtr ast, Block & src, Block & dst);
|
void collectFinalColumns(ASTPtr ast, Block & src, Block & dst);
|
||||||
|
|
||||||
|
void getReturnTypesImpl(ASTPtr ast, DataTypes & res);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +44,8 @@ void Block::rebuildIndexByPosition()
|
|||||||
void Block::insert(size_t position, const ColumnWithNameAndType & elem)
|
void Block::insert(size_t position, const ColumnWithNameAndType & elem)
|
||||||
{
|
{
|
||||||
if (position >= index_by_position.size())
|
if (position >= index_by_position.size())
|
||||||
throw Exception("Position out of bound in Block::insert()", ErrorCodes::POSITION_OUT_OF_BOUND);
|
throw Exception("Position out of bound in Block::insert(), max position = "
|
||||||
|
+ Poco::NumberFormatter::format(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||||
|
|
||||||
Container_t::iterator it = data.insert(index_by_position[position], elem);
|
Container_t::iterator it = data.insert(index_by_position[position], elem);
|
||||||
rebuildIndexByPosition();
|
rebuildIndexByPosition();
|
||||||
@ -63,7 +64,8 @@ void Block::insert(const ColumnWithNameAndType & elem)
|
|||||||
void Block::erase(size_t position)
|
void Block::erase(size_t position)
|
||||||
{
|
{
|
||||||
if (position >= index_by_position.size())
|
if (position >= index_by_position.size())
|
||||||
throw Exception("Position out of bound in Block::erase()", ErrorCodes::POSITION_OUT_OF_BOUND);
|
throw Exception("Position out of bound in Block::erase(), max position = "
|
||||||
|
+ Poco::NumberFormatter::format(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
|
||||||
|
|
||||||
Container_t::iterator it = index_by_position[position];
|
Container_t::iterator it = index_by_position[position];
|
||||||
index_by_name.erase(index_by_name.find(it->name));
|
index_by_name.erase(index_by_name.find(it->name));
|
||||||
@ -94,7 +96,8 @@ const ColumnWithNameAndType & Block::getByName(const std::string & name) const
|
|||||||
{
|
{
|
||||||
IndexByName_t::const_iterator it = index_by_name.find(name);
|
IndexByName_t::const_iterator it = index_by_name.find(name);
|
||||||
if (index_by_name.end() == it)
|
if (index_by_name.end() == it)
|
||||||
throw Exception("Not found column " + name + " in block.", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||||
|
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||||
|
|
||||||
return *it->second;
|
return *it->second;
|
||||||
}
|
}
|
||||||
@ -104,7 +107,8 @@ size_t Block::getPositionByName(const std::string & name) const
|
|||||||
{
|
{
|
||||||
IndexByName_t::const_iterator it = index_by_name.find(name);
|
IndexByName_t::const_iterator it = index_by_name.find(name);
|
||||||
if (index_by_name.end() == it)
|
if (index_by_name.end() == it)
|
||||||
throw Exception("Not found column " + name + " in block.", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||||
|
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||||
|
|
||||||
return std::distance(const_cast<Container_t &>(data).begin(), it->second);
|
return std::distance(const_cast<Container_t &>(data).begin(), it->second);
|
||||||
}
|
}
|
||||||
@ -118,10 +122,13 @@ size_t Block::rows() const
|
|||||||
size_t size = it->column->size();
|
size_t size = it->column->size();
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
throw Exception("Empty column in block.", ErrorCodes::EMPTY_COLUMN_IN_BLOCK);
|
throw Exception("Empty column " + it->name + " in block.", ErrorCodes::EMPTY_COLUMN_IN_BLOCK);
|
||||||
|
|
||||||
if (res != 0 && size != res)
|
if (res != 0 && size != res)
|
||||||
throw Exception("Sizes of columns doesn't match.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
throw Exception("Sizes of columns doesn't match: "
|
||||||
|
+ data.begin()->name + ": " + Poco::NumberFormatter::format(res)
|
||||||
|
+ ", " + it->name + ": " + Poco::NumberFormatter::format(size)
|
||||||
|
, ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||||
|
|
||||||
res = size;
|
res = size;
|
||||||
}
|
}
|
||||||
@ -135,4 +142,18 @@ size_t Block::columns() const
|
|||||||
return data.size();
|
return data.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string Block::dumpNames() const
|
||||||
|
{
|
||||||
|
std::stringstream res;
|
||||||
|
for (Container_t::const_iterator it = data.begin(); it != data.end(); ++it)
|
||||||
|
{
|
||||||
|
if (it != data.begin())
|
||||||
|
res << ", ";
|
||||||
|
res << it->name;
|
||||||
|
}
|
||||||
|
return res.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -36,9 +36,11 @@ Block LimitBlockInputStream::read()
|
|||||||
return res;
|
return res;
|
||||||
|
|
||||||
/// отдать кусок блока
|
/// отдать кусок блока
|
||||||
size_t start = std::max(0, static_cast<int>(offset) + static_cast<int>(rows) - static_cast<int>(pos));
|
size_t start = std::max(0, static_cast<int>(offset) - static_cast<int>(pos) + static_cast<int>(rows));
|
||||||
size_t length = std::min(rows - start, limit + offset + rows - pos);
|
size_t length = std::min(static_cast<int>(limit), std::min(
|
||||||
|
static_cast<int>(pos) - static_cast<int>(offset),
|
||||||
|
static_cast<int>(limit) + static_cast<int>(offset) - static_cast<int>(pos) + static_cast<int>(rows)));
|
||||||
|
|
||||||
for (size_t i = 0; i < res.columns(); ++i)
|
for (size_t i = 0; i < res.columns(); ++i)
|
||||||
res.getByPosition(i).column->cut(start, length);
|
res.getByPosition(i).column->cut(start, length);
|
||||||
|
|
||||||
|
87
dbms/src/DataStreams/tests/expression_stream.cpp
Normal file
87
dbms/src/DataStreams/tests/expression_stream.cpp
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
#include <Poco/SharedPtr.h>
|
||||||
|
#include <Poco/Stopwatch.h>
|
||||||
|
|
||||||
|
#include <DB/IO/WriteBufferFromOStream.h>
|
||||||
|
|
||||||
|
#include <DB/Storages/StorageSystemNumbers.h>
|
||||||
|
|
||||||
|
#include <DB/DataStreams/LimitBlockInputStream.h>
|
||||||
|
#include <DB/DataStreams/ExpressionBlockInputStream.h>
|
||||||
|
#include <DB/DataStreams/TabSeparatedRowOutputStream.h>
|
||||||
|
#include <DB/DataStreams/copyData.h>
|
||||||
|
|
||||||
|
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
||||||
|
|
||||||
|
#include <DB/Functions/FunctionsArithmetic.h>
|
||||||
|
|
||||||
|
#include <DB/Parsers/ParserSelectQuery.h>
|
||||||
|
|
||||||
|
using Poco::SharedPtr;
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char ** argv)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
size_t n = argc == 2 ? atoi(argv[1]) : 10;
|
||||||
|
|
||||||
|
DB::StorageSystemNumbers table;
|
||||||
|
|
||||||
|
DB::Names column_names;
|
||||||
|
column_names.push_back("number");
|
||||||
|
|
||||||
|
DB::ParserSelectQuery parser;
|
||||||
|
DB::ASTPtr ast;
|
||||||
|
std::string input = "SELECT number, number + 1, number * 2, number * 2 + 1";
|
||||||
|
std::string expected;
|
||||||
|
|
||||||
|
const char * begin = input.data();
|
||||||
|
const char * end = begin + input.size();
|
||||||
|
const char * pos = begin;
|
||||||
|
|
||||||
|
if (!parser.parse(pos, end, ast, expected))
|
||||||
|
{
|
||||||
|
std::cout << "Failed at position " << (pos - begin) << ": "
|
||||||
|
<< mysqlxx::quote << input.substr(pos - begin, 10)
|
||||||
|
<< ", expected " << expected << "." << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
DB::Context context;
|
||||||
|
context.columns["number"] = new DB::DataTypeUInt64;
|
||||||
|
context.functions["plus"] = new DB::FunctionPlus;
|
||||||
|
context.functions["multiply"] = new DB::FunctionMultiply;
|
||||||
|
|
||||||
|
Poco::SharedPtr<DB::Expression> expression = new DB::Expression(ast, context);
|
||||||
|
|
||||||
|
Poco::SharedPtr<DB::IBlockInputStream> in1(table.read(column_names, 0));
|
||||||
|
|
||||||
|
Poco::SharedPtr<DB::ExpressionBlockInputStream> in2 = new DB::ExpressionBlockInputStream(in1, expression);
|
||||||
|
DB::LimitBlockInputStream in3(in2, 10, std::max(0, static_cast<int>(n) - 10));
|
||||||
|
|
||||||
|
DB::WriteBufferFromOStream out1(std::cout);
|
||||||
|
DB::TabSeparatedRowOutputStream out2(out1, new DB::DataTypes(expression->getReturnTypes()));
|
||||||
|
|
||||||
|
{
|
||||||
|
Poco::Stopwatch stopwatch;
|
||||||
|
stopwatch.start();
|
||||||
|
|
||||||
|
DB::copyData(in3, out2);
|
||||||
|
|
||||||
|
stopwatch.stop();
|
||||||
|
std::cout << std::fixed << std::setprecision(2)
|
||||||
|
<< "Elapsed " << stopwatch.elapsed() / 1000000.0 << " sec."
|
||||||
|
<< ", " << n * 1000000 / stopwatch.elapsed() << " rows/sec."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const DB::Exception & e)
|
||||||
|
{
|
||||||
|
std::cerr << e.what() << ", " << e.message() << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -136,7 +136,7 @@ void Expression::executeImpl(ASTPtr ast, Block & block)
|
|||||||
|
|
||||||
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
|
if (ASTFunction * node = dynamic_cast<ASTFunction *>(&*ast))
|
||||||
{
|
{
|
||||||
std::cerr << node->getTreeID() << std::endl;
|
//std::cerr << node->getTreeID() << std::endl;
|
||||||
|
|
||||||
/// Вставляем в блок столбцы - результаты вычисления функции
|
/// Вставляем в блок столбцы - результаты вычисления функции
|
||||||
ColumnNumbers argument_numbers;
|
ColumnNumbers argument_numbers;
|
||||||
@ -170,7 +170,7 @@ void Expression::executeImpl(ASTPtr ast, Block & block)
|
|||||||
}
|
}
|
||||||
else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
|
else if (ASTLiteral * node = dynamic_cast<ASTLiteral *>(&*ast))
|
||||||
{
|
{
|
||||||
std::cerr << node->getTreeID() << std::endl;
|
//std::cerr << node->getTreeID() << std::endl;
|
||||||
|
|
||||||
/// Вставляем в блок столбец - константу
|
/// Вставляем в блок столбец - константу
|
||||||
|
|
||||||
@ -214,5 +214,33 @@ void Expression::collectFinalColumns(ASTPtr ast, Block & src, Block & dst)
|
|||||||
collectFinalColumns(*it, src, dst);
|
collectFinalColumns(*it, src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DataTypes Expression::getReturnTypes()
|
||||||
|
{
|
||||||
|
DataTypes res;
|
||||||
|
getReturnTypesImpl(ast, res);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Expression::getReturnTypesImpl(ASTPtr ast, DataTypes & res)
|
||||||
|
{
|
||||||
|
if (ASTExpressionList * node = dynamic_cast<ASTExpressionList *>(&*ast))
|
||||||
|
{
|
||||||
|
for (ASTs::iterator it = node->children.begin(); it != node->children.end(); ++it)
|
||||||
|
{
|
||||||
|
if (ASTIdentifier * ident = dynamic_cast<ASTIdentifier *>(&**it))
|
||||||
|
res.push_back(ident->type);
|
||||||
|
else if (ASTFunction * func = dynamic_cast<ASTFunction *>(&**it))
|
||||||
|
res.insert(res.end(), func->return_types.begin(), func->return_types.end());
|
||||||
|
else if (ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&**it))
|
||||||
|
res.push_back(lit->type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
for (ASTs::iterator it = ast->children.begin(); it != ast->children.end(); ++it)
|
||||||
|
getReturnTypesImpl(*it, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,9 +21,8 @@ NumbersBlockInputStream::NumbersBlockInputStream(size_t block_size_) : block_siz
|
|||||||
Block NumbersBlockInputStream::read()
|
Block NumbersBlockInputStream::read()
|
||||||
{
|
{
|
||||||
Block res;
|
Block res;
|
||||||
res.insert(ColumnWithNameAndType());
|
|
||||||
|
ColumnWithNameAndType column_with_name_and_type;
|
||||||
ColumnWithNameAndType & column_with_name_and_type = res.getByPosition(0);
|
|
||||||
|
|
||||||
column_with_name_and_type.name = "number";
|
column_with_name_and_type.name = "number";
|
||||||
column_with_name_and_type.type = new DataTypeUInt64();
|
column_with_name_and_type.type = new DataTypeUInt64();
|
||||||
@ -33,6 +32,8 @@ Block NumbersBlockInputStream::read()
|
|||||||
|
|
||||||
for (size_t i = 0; i < block_size; ++i)
|
for (size_t i = 0; i < block_size; ++i)
|
||||||
vec[i] = next++;
|
vec[i] = next++;
|
||||||
|
|
||||||
|
res.insert(column_with_name_and_type);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user