Added limited support for expressions in INSERT ... VALUES [#METR-20048].

This commit is contained in:
Alexey Milovidov 2016-02-13 09:37:19 +03:00
parent 0f6ad5450f
commit 12363ed329
24 changed files with 437 additions and 301 deletions

View File

@ -305,6 +305,8 @@ add_library (dbms
include/DB/Interpreters/LogicalExpressionsOptimizer.h
include/DB/Interpreters/InterserverIOHandler.h
include/DB/Interpreters/evaluateMissingDefaults.h
include/DB/Interpreters/evaluateConstantExpression.h
include/DB/Interpreters/convertFieldToType.h
include/DB/Interpreters/ExpressionActions.h
include/DB/Interpreters/InterpreterRenameQuery.h
include/DB/Interpreters/Join.h
@ -745,6 +747,8 @@ add_library (dbms
src/Interpreters/loadMetadata.cpp
src/Interpreters/sortBlock.cpp
src/Interpreters/evaluateMissingDefaults.cpp
src/Interpreters/evaluateConstantExpression.cpp
src/Interpreters/convertFieldToType.cpp
src/Interpreters/reinterpretAsIdentifier.cpp
src/Interpreters/Set.cpp
src/Interpreters/Join.cpp

View File

@ -3,7 +3,6 @@
#include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp>
#include <DB/DataStreams/AsynchronousBlockInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataTypes/DataTypeFactory.h>
#include <DB/Interpreters/Context.h>
#include <DB/IO/copyData.h>
@ -60,7 +59,7 @@ public:
{
initReadBuffer();
initSampleBlock(context);
ExternalTableData res = std::make_pair(new AsynchronousBlockInputStream(context.getFormatFactory().getInput(
ExternalTableData res = std::make_pair(new AsynchronousBlockInputStream(context.getInputFormat(
format, *read_buffer, sample_block, DEFAULT_BLOCK_SIZE)), name);
return res;
}

View File

@ -7,6 +7,8 @@
namespace DB
{
class Context;
/** Позволяет создать IBlockInputStream или IBlockOutputStream по названию формата.
* Замечание: формат и сжатие - независимые вещи.
*/
@ -14,10 +16,10 @@ class FormatFactory
{
public:
BlockInputStreamPtr getInput(const String & name, ReadBuffer & buf,
const Block & sample, size_t max_block_size) const;
const Block & sample, const Context & context, size_t max_block_size) const;
BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
const Block & sample) const;
const Block & sample, const Context & context) const;
};
}

View File

@ -1,7 +1,5 @@
#pragma once
#include <Poco/SharedPtr.h>
#include <DB/Core/Block.h>
#include <DB/IO/ReadBuffer.h>
#include <DB/DataStreams/IRowInputStream.h>
@ -10,7 +8,7 @@
namespace DB
{
using Poco::SharedPtr;
class Context;
/** Поток для чтения данных в формате VALUES (как в INSERT запросе).
@ -18,7 +16,7 @@ using Poco::SharedPtr;
class ValuesRowInputStream : public IRowInputStream
{
public:
ValuesRowInputStream(ReadBuffer & istr_, const Block & sample_);
ValuesRowInputStream(ReadBuffer & istr_, const Block & sample_, const Context & context_);
bool read(Row & row) override;
@ -26,6 +24,7 @@ private:
ReadBuffer & istr;
const Block sample;
DataTypes data_types;
const Context & context;
};
}

View File

@ -5,7 +5,6 @@
#include <DB/Dictionaries/IDictionarySource.h>
#include <DB/Dictionaries/OwningBufferBlockInputStream.h>
#include <DB/IO/ReadBufferFromFile.h>
#include <DB/DataStreams/FormatFactory.h>
#include <Poco/Timestamp.h>
#include <Poco/File.h>
@ -32,7 +31,7 @@ public:
BlockInputStreamPtr loadAll() override
{
auto in_ptr = std::make_unique<ReadBufferFromFile>(filename);
auto stream = context.getFormatFactory().getInput(
auto stream = context.getInputFormat(
format, *in_ptr, sample_block, max_block_size);
last_modification = getLastModification();

View File

@ -25,7 +25,6 @@ class ContextShared;
class QuotaForIntervals;
class TableFunctionFactory;
class AggregateFunctionFactory;
class FormatFactory;
class Dictionaries;
class ExternalDictionaries;
class InterserverIOHandler;
@ -186,12 +185,15 @@ public:
const TableFunctionFactory & getTableFunctionFactory() const;
const AggregateFunctionFactory & getAggregateFunctionFactory() const;
const FormatFactory & getFormatFactory() const;
const Dictionaries & getDictionaries() const;
const ExternalDictionaries & getExternalDictionaries() const;
void tryCreateDictionaries() const;
void tryCreateExternalDictionaries() const;
/// Форматы ввода-вывода.
BlockInputStreamPtr getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, size_t max_block_size) const;
BlockOutputStreamPtr getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const;
InterserverIOHandler & getInterserverIOHandler();
/// Как другие серверы могут обратиться к этому для скачивания реплицируемых данных.

View File

@ -6,7 +6,6 @@
#include <DB/Interpreters/IInterpreter.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/DataStreams/BlockIO.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>

View File

@ -0,0 +1,18 @@
#pragma once
#include <DB/Core/Field.h>
namespace DB
{
class IAST;
class Context;
/** Выполнить константное выражение.
* Используется в редких случаях - для элемента множества в IN, для данных для INSERT.
* Весьма неоптимально.
*/
Field evaluateConstantExpression(SharedPtr<IAST> & node, const Context & context);
}

View File

@ -527,6 +527,9 @@ private:
{
const char * pos = begin;
ASTPtr ast = parseQuery(pos, end);
if (!ast)
return true;
ASTInsertQuery * insert = typeid_cast<ASTInsertQuery *>(&*ast);
if (insert && insert->data)
@ -753,7 +756,7 @@ private:
if (!insert->format.empty())
current_format = insert->format;
BlockInputStreamPtr block_input = context.getFormatFactory().getInput(
BlockInputStreamPtr block_input = context.getInputFormat(
current_format, buf, sample, insert_format_max_block_size);
BlockInputStreamPtr async_block_input = new AsynchronousBlockInputStream(block_input);
@ -911,7 +914,7 @@ private:
if (has_vertical_output_suffix)
current_format = "Vertical";
block_std_out = context.getFormatFactory().getOutput(current_format, std_out, block);
block_std_out = context.getOutputFormat(current_format, std_out, block);
block_std_out->writePrefix();
}
}

View File

@ -324,6 +324,7 @@ namespace ErrorCodes
extern const int INVALID_CONFIG_PARAMETER = 318;
extern const int UNKNOWN_STATUS_OF_INSERT = 319;
extern const int DUPLICATE_SHARD_PATHS = 320;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE = 321;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf,
const Block & sample, size_t max_block_size) const
const Block & sample, const Context & context, size_t max_block_size) const
{
if (name == "Native")
return new NativeBlockInputStream(buf);
@ -49,7 +49,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
else if (name == "TabSeparatedWithNamesAndTypes")
return new BlockInputStreamFromRowInputStream(new TabSeparatedRowInputStream(buf, sample, true, true), sample, max_block_size);
else if (name == "Values")
return new BlockInputStreamFromRowInputStream(new ValuesRowInputStream(buf, sample), sample, max_block_size);
return new BlockInputStreamFromRowInputStream(new ValuesRowInputStream(buf, sample, context), sample, max_block_size);
else if (name == "CSV")
return new BlockInputStreamFromRowInputStream(new CSVRowInputStream(buf, sample, ','), sample, max_block_size);
else if (name == "CSVWithNames")
@ -77,7 +77,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf,
const Block & sample) const
const Block & sample, const Context & context) const
{
if (name == "Native")
return new NativeBlockOutputStream(buf);

View File

@ -1,15 +1,29 @@
#include <DB/IO/ReadHelpers.h>
#include <DB/Interpreters/evaluateConstantExpression.h>
#include <DB/Interpreters/convertFieldToType.h>
#include <DB/Parsers/ExpressionListParsers.h>
#include <DB/DataStreams/ValuesRowInputStream.h>
#include <DB/Core/FieldVisitors.h>
namespace DB
{
using Poco::SharedPtr;
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_QUOTED_STRING;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_PARSE_DATETIME;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int CANNOT_PARSE_DATE;
extern const int SYNTAX_ERROR;
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
}
ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & sample_)
: istr(istr_), sample(sample_)
ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & sample_, const Context & context_)
: istr(istr_), sample(sample_), context(context_)
{
size_t columns = sample.columns();
data_types.resize(columns);
@ -31,20 +45,84 @@ bool ValuesRowInputStream::read(Row & row)
return false;
}
/** Как правило, это обычный формат для потокового парсинга.
* Но в качестве исключения, поддерживается также обработка произвольных выражений вместо значений.
* Это очень неэффективно. Но если выражений нет, то оверхед отсутствует.
*/
ParserExpressionWithOptionalAlias parser(false);
assertChar('(', istr);
for (size_t i = 0; i < size; ++i)
{
if (i != 0)
assertChar(',', istr);
skipWhitespaceIfAny(istr);
skipWhitespaceIfAny(istr);
data_types[i]->deserializeTextQuoted(row[i], istr);
skipWhitespaceIfAny(istr);
char * prev_istr_position = istr.position();
size_t prev_istr_bytes = istr.count() - istr.offset();
try
{
data_types[i]->deserializeTextQuoted(row[i], istr);
skipWhitespaceIfAny(istr);
if (i != size - 1)
assertChar(',', istr);
else
assertChar(')', istr);
}
catch (const Exception & e)
{
/** Обычный потоковый парсер не смог распарсить значение.
* Попробуем распарсить его SQL-парсером как константное выражение.
* Это исключительный случай.
*/
if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED
|| e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING
|| e.code() == ErrorCodes::CANNOT_PARSE_DATE
|| e.code() == ErrorCodes::CANNOT_PARSE_DATETIME
|| e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT
|| e.code() == ErrorCodes::CANNOT_PARSE_DATE)
{
/// TODO Работоспособность, если выражение не помещается целиком до конца буфера.
/// Если начало значения уже не лежит в буфере.
if (istr.count() - istr.offset() != prev_istr_bytes)
throw;
IParser::Pos pos = prev_istr_position;
Expected expected = "";
IParser::Pos max_parsed_pos = pos;
ASTPtr ast;
if (!parser.parse(pos, istr.buffer().end(), ast, max_parsed_pos, expected))
throw Exception("Cannot parse expression of type " + data_types[i]->getName() + " here: "
+ String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)),
ErrorCodes::SYNTAX_ERROR);
istr.position() = const_cast<char *>(max_parsed_pos);
row[i] = convertFieldToType(evaluateConstantExpression(ast, context), *data_types[i]);
/// TODO После добавления поддержки NULL, добавить сюда проверку на data type is nullable.
if (row[i].isNull())
throw Exception("Expression returns value " + apply_visitor(FieldVisitorToString(), row[i])
+ ", that is out of range of type " + data_types[i]->getName()
+ ", at: " + String(prev_istr_position, std::min(SHOW_CHARS_ON_SYNTAX_ERROR, istr.buffer().end() - prev_istr_position)),
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE);
skipWhitespaceIfAny(istr);
if (i != size - 1)
assertChar(',', istr);
else
assertChar(')', istr);
}
else
throw;
}
}
assertChar(')', istr);
skipWhitespaceIfAny(istr);
if (!istr.eof() && *istr.position() == ',')
++istr.position();

View File

@ -10,8 +10,8 @@
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/DataStreams/CollapsingSortedBlockInputStream.h>
#include <DB/DataStreams/CollapsingFinalBlockInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/Interpreters/Context.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
@ -80,9 +80,9 @@ int main(int argc, char ** argv)
//CollapsingSortedBlockInputStream collapsed(inputs, descr, "Sign", 1048576);
CollapsingFinalBlockInputStream collapsed(inputs, descr, "Sign");
FormatFactory formats;
Context context;
WriteBufferFromFileDescriptor out_buf(STDERR_FILENO);
BlockOutputStreamPtr output = formats.getOutput("TabSeparated", out_buf, block1);
BlockOutputStreamPtr output = context.getOutputFormat("TabSeparated", out_buf, block1);
copyData(collapsed, *output);
}

View File

@ -14,7 +14,6 @@
#include <DB/DataStreams/FilterBlockInputStream.h>
#include <DB/DataStreams/TabSeparatedRowOutputStream.h>
#include <DB/DataStreams/ForkBlockInputStreams.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
@ -93,13 +92,12 @@ int main(int argc, char ** argv)
in2 = new LimitBlockInputStream(in2, 20, 5);
Block out_sample = expression->getSampleBlock();
FormatFactory format_factory;
WriteBufferFromOStream ob1(std::cout);
WriteBufferFromOStream ob2(std::cerr);
BlockOutputStreamPtr out1 = format_factory.getOutput("TabSeparated", ob1, out_sample);
BlockOutputStreamPtr out2 = format_factory.getOutput("TabSeparated", ob2, out_sample);
BlockOutputStreamPtr out1 = context.getOutputFormat("TabSeparated", ob1, out_sample);
BlockOutputStreamPtr out2 = context.getOutputFormat("TabSeparated", ob2, out_sample);
std::thread thr1(std::bind(thread1, in1, out1, std::ref(ob1)));
std::thread thr2(std::bind(thread2, in2, out2, std::ref(ob2)));

View File

@ -8,7 +8,6 @@
#include <DB/Interpreters/loadMetadata.h>
#include <DB/Interpreters/executeQuery.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/glueBlockInputStreams.h>
@ -64,8 +63,8 @@ int main(int argc, char ** argv)
WriteBufferFromFileDescriptor wb(STDOUT_FILENO);
BlockOutputStreamPtr out1 = context.getFormatFactory().getOutput("TabSeparated", wb, io1.in_sample);
BlockOutputStreamPtr out2 = context.getFormatFactory().getOutput("TabSeparated", wb, io2.in_sample);
BlockOutputStreamPtr out1 = context.getOutputFormat("TabSeparated", wb, io1.in_sample);
BlockOutputStreamPtr out2 = context.getOutputFormat("TabSeparated", wb, io2.in_sample);
BlockInputStreams inputs;
inputs.push_back(io1.in);

View File

@ -11,7 +11,6 @@
#include <DB/DataStreams/LimitBlockInputStream.h>
#include <DB/DataStreams/UnionBlockInputStream.h>
#include <DB/DataStreams/BlockExtraInfoInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
@ -20,131 +19,132 @@
#include <DB/Interpreters/Context.h>
using Poco::SharedPtr;
using namespace DB;
void test1()
{
DB::StoragePtr table = DB::StorageSystemNumbers::create("numbers");
Context context;
StoragePtr table = StorageSystemNumbers::create("numbers");
DB::Names column_names;
Names column_names;
column_names.push_back("number");
DB::QueryProcessingStage::Enum stage1;
DB::QueryProcessingStage::Enum stage2;
DB::QueryProcessingStage::Enum stage3;
QueryProcessingStage::Enum stage1;
QueryProcessingStage::Enum stage2;
QueryProcessingStage::Enum stage3;
DB::BlockInputStreams streams;
streams.emplace_back(new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage1, 1)[0], 30, 30000));
streams.emplace_back(new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage2, 1)[0], 30, 2000));
streams.emplace_back(new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage3, 1)[0], 30, 100));
BlockInputStreams streams;
streams.emplace_back(new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage1, 1)[0], 30, 30000));
streams.emplace_back(new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage2, 1)[0], 30, 2000));
streams.emplace_back(new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage3, 1)[0], 30, 100));
DB::UnionBlockInputStream<> union_stream(streams, nullptr, 2);
UnionBlockInputStream<> union_stream(streams, nullptr, 2);
DB::FormatFactory format_factory;
DB::WriteBufferFromFileDescriptor wb(STDERR_FILENO);
DB::Block sample = table->getSampleBlock();
DB::BlockOutputStreamPtr out = format_factory.getOutput("TabSeparated", wb, sample);
WriteBufferFromFileDescriptor wb(STDERR_FILENO);
Block sample = table->getSampleBlock();
BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample);
while (DB::Block block = union_stream.read())
while (Block block = union_stream.read())
{
out->write(block);
wb.next();
}
//DB::copyData(union_stream, *out);
//copyData(union_stream, *out);
}
void test2()
{
DB::StoragePtr table = DB::StorageSystemNumbers::create("numbers");
Context context;
StoragePtr table = StorageSystemNumbers::create("numbers");
DB::Names column_names;
Names column_names;
column_names.push_back("number");
DB::QueryProcessingStage::Enum stage1;
DB::QueryProcessingStage::Enum stage2;
DB::QueryProcessingStage::Enum stage3;
QueryProcessingStage::Enum stage1;
QueryProcessingStage::Enum stage2;
QueryProcessingStage::Enum stage3;
DB::BlockExtraInfo extra_info1;
BlockExtraInfo extra_info1;
extra_info1.host = "host1";
extra_info1.resolved_address = "127.0.0.1";
extra_info1.port = 9000;
extra_info1.user = "user1";
DB::BlockExtraInfo extra_info2;
BlockExtraInfo extra_info2;
extra_info2.host = "host2";
extra_info2.resolved_address = "127.0.0.2";
extra_info2.port = 9001;
extra_info2.user = "user2";
DB::BlockExtraInfo extra_info3;
BlockExtraInfo extra_info3;
extra_info3.host = "host3";
extra_info3.resolved_address = "127.0.0.3";
extra_info3.port = 9003;
extra_info3.user = "user3";
DB::BlockInputStreams streams;
BlockInputStreams streams;
DB::BlockInputStreamPtr stream1 = new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage1, 1)[0], 30, 30000);
stream1 = new DB::BlockExtraInfoInputStream(stream1, extra_info1);
BlockInputStreamPtr stream1 = new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage1, 1)[0], 30, 30000);
stream1 = new BlockExtraInfoInputStream(stream1, extra_info1);
streams.emplace_back(stream1);
DB::BlockInputStreamPtr stream2 = new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage2, 1)[0], 30, 2000);
stream2 = new DB::BlockExtraInfoInputStream(stream2, extra_info2);
BlockInputStreamPtr stream2 = new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage2, 1)[0], 30, 2000);
stream2 = new BlockExtraInfoInputStream(stream2, extra_info2);
streams.emplace_back(stream2);
DB::BlockInputStreamPtr stream3 = new DB::LimitBlockInputStream(table->read(column_names, 0, DB::Context{}, DB::Settings(), stage3, 1)[0], 30, 100);
stream3 = new DB::BlockExtraInfoInputStream(stream3, extra_info3);
BlockInputStreamPtr stream3 = new LimitBlockInputStream(table->read(column_names, 0, context, Settings(), stage3, 1)[0], 30, 100);
stream3 = new BlockExtraInfoInputStream(stream3, extra_info3);
streams.emplace_back(stream3);
DB::UnionBlockInputStream<DB::StreamUnionMode::ExtraInfo> union_stream(streams, nullptr, 2);
UnionBlockInputStream<StreamUnionMode::ExtraInfo> union_stream(streams, nullptr, 2);
auto getSampleBlock = []()
{
DB::Block block;
DB::ColumnWithTypeAndName col;
Block block;
ColumnWithTypeAndName col;
col.name = "number";
col.type = new DB::DataTypeUInt64;
col.type = new DataTypeUInt64;
col.column = col.type->createColumn();
block.insert(col);
col.name = "host_name";
col.type = new DB::DataTypeString;
col.type = new DataTypeString;
col.column = col.type->createColumn();
block.insert(col);
col.name = "host_address";
col.type = new DB::DataTypeString;
col.type = new DataTypeString;
col.column = col.type->createColumn();
block.insert(col);
col.name = "port";
col.type = new DB::DataTypeUInt16;
col.type = new DataTypeUInt16;
col.column = col.type->createColumn();
block.insert(col);
col.name = "user";
col.type = new DB::DataTypeString;
col.type = new DataTypeString;
col.column = col.type->createColumn();
block.insert(col);
return block;
};
DB::FormatFactory format_factory;
DB::WriteBufferFromFileDescriptor wb(STDERR_FILENO);
DB::Block sample = getSampleBlock();
DB::BlockOutputStreamPtr out = format_factory.getOutput("TabSeparated", wb, sample);
WriteBufferFromFileDescriptor wb(STDERR_FILENO);
Block sample = getSampleBlock();
BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample);
while (DB::Block block = union_stream.read())
while (Block block = union_stream.read())
{
const auto & col = block.getByPosition(0);
auto extra_info = union_stream.getBlockExtraInfo();
DB::ColumnPtr host_name_column = new DB::ColumnString;
DB::ColumnPtr host_address_column = new DB::ColumnString;
DB::ColumnPtr port_column = new DB::ColumnUInt16;
DB::ColumnPtr user_column = new DB::ColumnString;
ColumnPtr host_name_column = new ColumnString;
ColumnPtr host_address_column = new ColumnString;
ColumnPtr port_column = new ColumnUInt16;
ColumnPtr user_column = new ColumnString;
size_t row_count = block.rows();
for (size_t i = 0; i < row_count; ++i)
@ -155,17 +155,17 @@ void test2()
user_column->insert(extra_info.user);
}
DB::Block out_block;
out_block.insert(DB::ColumnWithTypeAndName(col.column->clone(), col.type, col.name));
out_block.insert(DB::ColumnWithTypeAndName(host_name_column, new DB::DataTypeString, "host_name"));
out_block.insert(DB::ColumnWithTypeAndName(host_address_column, new DB::DataTypeString, "host_address"));
out_block.insert(DB::ColumnWithTypeAndName(port_column, new DB::DataTypeUInt16, "port"));
out_block.insert(DB::ColumnWithTypeAndName(user_column, new DB::DataTypeString, "user"));
Block out_block;
out_block.insert(ColumnWithTypeAndName(col.column->clone(), col.type, col.name));
out_block.insert(ColumnWithTypeAndName(host_name_column, new DataTypeString, "host_name"));
out_block.insert(ColumnWithTypeAndName(host_address_column, new DataTypeString, "host_address"));
out_block.insert(ColumnWithTypeAndName(port_column, new DataTypeUInt16, "port"));
out_block.insert(ColumnWithTypeAndName(user_column, new DataTypeString, "user"));
out->write(out_block);
wb.next();
}
//DB::copyData(union_stream, *out);
//copyData(union_stream, *out);
}
int main(int argc, char ** argv)
@ -175,7 +175,7 @@ int main(int argc, char ** argv)
test1();
test2();
}
catch (const DB::Exception & e)
catch (const Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl
<< std::endl

View File

@ -11,7 +11,6 @@
#include <DB/DataStreams/LimitBlockInputStream.h>
#include <DB/DataStreams/UnionBlockInputStream.h>
#include <DB/DataStreams/AsynchronousBlockInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
@ -19,49 +18,46 @@
#include <DB/Interpreters/Context.h>
#include <DB/Interpreters/loadMetadata.h>
using Poco::SharedPtr;
using namespace DB;
int main(int argc, char ** argv)
try
{
try
{
DB::Context context;
DB::Settings settings = context.getSettings();
Context context;
Settings settings = context.getSettings();
context.setPath("./");
context.setPath("./");
DB::loadMetadata(context);
loadMetadata(context);
DB::Names column_names;
column_names.push_back("WatchID");
Names column_names;
column_names.push_back("WatchID");
DB::StoragePtr table = context.getTable("default", "hits6");
StoragePtr table = context.getTable("default", "hits6");
DB::QueryProcessingStage::Enum stage;
DB::BlockInputStreams streams = table->read(column_names, nullptr, context, settings, stage, settings.max_block_size, settings.max_threads);
QueryProcessingStage::Enum stage;
BlockInputStreams streams = table->read(column_names, nullptr, context, settings, stage, settings.max_block_size, settings.max_threads);
for (size_t i = 0, size = streams.size(); i < size; ++i)
streams[i] = new DB::AsynchronousBlockInputStream(streams[i]);
for (size_t i = 0, size = streams.size(); i < size; ++i)
streams[i] = new AsynchronousBlockInputStream(streams[i]);
DB::BlockInputStreamPtr stream = new DB::UnionBlockInputStream<>(streams, nullptr, settings.max_threads);
stream = new DB::LimitBlockInputStream(stream, 10, 0);
BlockInputStreamPtr stream = new UnionBlockInputStream<>(streams, nullptr, settings.max_threads);
stream = new LimitBlockInputStream(stream, 10, 0);
DB::FormatFactory format_factory;
DB::WriteBufferFromFileDescriptor wb(STDERR_FILENO);
DB::Block sample = table->getSampleBlock();
DB::BlockOutputStreamPtr out = format_factory.getOutput("TabSeparated", wb, sample);
WriteBufferFromFileDescriptor wb(STDERR_FILENO);
Block sample = table->getSampleBlock();
BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample);
DB::copyData(*stream, *out);
}
catch (const DB::Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl
<< std::endl
<< "Stack trace:" << std::endl
<< e.getStackTrace().toString();
return 1;
}
copyData(*stream, *out);
return 0;
}
catch (const Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl
<< std::endl
<< "Stack trace:" << std::endl
<< e.getStackTrace().toString();
return 1;
}

View File

@ -177,7 +177,6 @@ Context::~Context() = default;
const TableFunctionFactory & Context::getTableFunctionFactory() const { return shared->table_function_factory; }
const AggregateFunctionFactory & Context::getAggregateFunctionFactory() const { return shared->aggregate_function_factory; }
const FormatFactory & Context::getFormatFactory() const { return shared->format_factory; }
InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; }
Poco::Mutex & Context::getMutex() const { return shared->mutex; }
const Databases & Context::getDatabases() const { return shared->databases; }
@ -984,6 +983,17 @@ const MergeTreeSettings & Context::getMergeTreeSettings()
}
BlockInputStreamPtr Context::getInputFormat(const String & name, ReadBuffer & buf, const Block & sample, size_t max_block_size) const
{
return shared->format_factory.getInput(name, buf, sample, *this, max_block_size);
}
BlockOutputStreamPtr Context::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample) const
{
return shared->format_factory.getOutput(name, buf, sample, *this);
}
void Context::shutdown()
{
shared->shutdown();

View File

@ -5,7 +5,6 @@
#include <DB/DataStreams/AddingDefaultBlockOutputStream.h>
#include <DB/DataStreams/PushingToViewsBlockOutputStream.h>
#include <DB/DataStreams/NullAndDoCopyBlockInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/Parsers/ASTInsertQuery.h>

View File

@ -16,7 +16,6 @@
#include <DB/DataStreams/copyData.h>
#include <DB/DataStreams/CreatingSetsBlockInputStream.h>
#include <DB/DataStreams/MaterializingBlockInputStream.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/ConcatBlockInputStream.h>
#include <DB/Parsers/ASTSelectQuery.h>

View File

@ -8,21 +8,15 @@
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Interpreters/Set.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Interpreters/ExpressionActions.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypeFixedString.h>
#include <DB/DataTypes/DataTypeDate.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/DataTypes/DataTypeEnum.h>
#include <DB/Interpreters/convertFieldToType.h>
#include <DB/Interpreters/evaluateConstantExpression.h>
namespace DB
@ -34,7 +28,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int TYPE_MISMATCH;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_ELEMENT_OF_SET;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
}
@ -244,166 +237,12 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set)
}
/** Проверка попадания Field from, имеющим тип From в диапазон значений типа To.
* From и To - числовые типы. Могут быть типами с плавающей запятой.
* From - это одно из UInt64, Int64, Float64,
* тогда как To может быть также 8, 16, 32 битным.
*
* Если попадает в диапазон, то from конвертируется в Field ближайшего к To типа.
* Если не попадает - возвращается Field(Null).
*/
template <typename From, typename To>
static Field convertNumericTypeImpl(const Field & from)
{
From value = from.get<From>();
if (static_cast<long double>(value) != static_cast<long double>(To(value)))
return {};
return Field(typename NearestFieldType<To>::Type(value));
}
template <typename To>
static Field convertNumericType(const Field & from, const IDataType & type)
{
if (from.getType() == Field::Types::UInt64)
return convertNumericTypeImpl<UInt64, To>(from);
if (from.getType() == Field::Types::Int64)
return convertNumericTypeImpl<Int64, To>(from);
if (from.getType() == Field::Types::Float64)
return convertNumericTypeImpl<Float64, To>(from);
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
+ Field::Types::toString(from.getType()) + " at right", ErrorCodes::TYPE_MISMATCH);
}
/** Чтобы корректно работали выражения вида 1.0 IN (1) или чтобы 1 IN (1, 2.0, 2.5, -1) работало так же, как 1 IN (1, 2).
* Проверяет совместимость типов, проверяет попадание значений в диапазон допустимых значений типа, делает преобразование типа.
* Если значение не попадает в диапазон - возвращает Null.
*/
static Field convertToType(const Field & src, const IDataType & type)
{
if (type.isNumeric())
{
if (typeid_cast<const DataTypeUInt8 *>(&type)) return convertNumericType<UInt8>(src, type);
if (typeid_cast<const DataTypeUInt16 *>(&type)) return convertNumericType<UInt16>(src, type);
if (typeid_cast<const DataTypeUInt32 *>(&type)) return convertNumericType<UInt32>(src, type);
if (typeid_cast<const DataTypeUInt64 *>(&type)) return convertNumericType<UInt64>(src, type);
if (typeid_cast<const DataTypeInt8 *>(&type)) return convertNumericType<Int8>(src, type);
if (typeid_cast<const DataTypeInt16 *>(&type)) return convertNumericType<Int16>(src, type);
if (typeid_cast<const DataTypeInt32 *>(&type)) return convertNumericType<Int32>(src, type);
if (typeid_cast<const DataTypeInt64 *>(&type)) return convertNumericType<Int64>(src, type);
if (typeid_cast<const DataTypeFloat32 *>(&type)) return convertNumericType<Float32>(src, type);
if (typeid_cast<const DataTypeFloat64 *>(&type)) return convertNumericType<Float64>(src, type);
const bool is_date = typeid_cast<const DataTypeDate *>(&type);
bool is_datetime = false;
bool is_enum8 = false;
bool is_enum16 = false;
if (!is_date)
if (!(is_datetime = typeid_cast<const DataTypeDateTime *>(&type)))
if (!(is_enum8 = typeid_cast<const DataTypeEnum8 *>(&type)))
if (!(is_enum16 = typeid_cast<const DataTypeEnum16 *>(&type)))
throw Exception{
"Logical error: unknown numeric type " + type.getName(),
ErrorCodes::LOGICAL_ERROR
};
const auto is_enum = is_enum8 || is_enum16;
/// Numeric values for Enums should not be used directly in IN section
if (src.getType() == Field::Types::UInt64 && !is_enum)
return src;
if (src.getType() == Field::Types::String)
{
/// Возможность сравнивать даты и даты-с-временем со строкой.
const String & str = src.get<const String &>();
ReadBufferFromString in(str);
if (is_date)
{
DayNum_t date{};
readDateText(date, in);
if (!in.eof())
throw Exception("String is too long for Date: " + str);
return Field(UInt64(date));
}
else if (is_datetime)
{
time_t date_time{};
readDateTimeText(date_time, in);
if (!in.eof())
throw Exception("String is too long for DateTime: " + str);
return Field(UInt64(date_time));
}
else if (is_enum8)
return Field(UInt64(static_cast<const DataTypeEnum8 &>(type).getValue(str)));
else if (is_enum16)
return Field(UInt64(static_cast<const DataTypeEnum16 &>(type).getValue(str)));
}
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
+ Field::Types::toString(src.getType()) + " at right", ErrorCodes::TYPE_MISMATCH);
}
else
{
if (src.getType() == Field::Types::UInt64
|| src.getType() == Field::Types::Int64
|| src.getType() == Field::Types::Float64
|| src.getType() == Field::Types::Null
|| (src.getType() == Field::Types::String
&& !typeid_cast<const DataTypeString *>(&type)
&& !typeid_cast<const DataTypeFixedString *>(&type))
|| (src.getType() == Field::Types::Array
&& !typeid_cast<const DataTypeArray *>(&type)))
throw Exception("Type mismatch in IN section: " + type.getName() + " at left, "
+ Field::Types::toString(src.getType()) + " at right", ErrorCodes::TYPE_MISMATCH);
}
return src;
}
/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */
static Field evaluateConstantExpression(ASTPtr & node, const Context & context)
{
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(
node, context, nullptr, NamesAndTypesList{{ "_dummy", new DataTypeUInt8 }}).getConstActions();
/// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк.
Block block_with_constants{{ new ColumnConstUInt8(1, 0), new DataTypeUInt8, "_dummy" }};
expr_for_constant_folding->execute(block_with_constants);
if (!block_with_constants || block_with_constants.rows() == 0)
throw Exception("Logical error: empty block after evaluation constant expression for IN", ErrorCodes::LOGICAL_ERROR);
String name = node->getColumnName();
if (!block_with_constants.has(name))
throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
const IColumn & result_column = *block_with_constants.getByName(name).column;
if (!result_column.isConst())
throw Exception("Element of set in IN is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
return result_column[0];
}
static Field extractValueFromNode(ASTPtr & node, const IDataType & type, const Context & context)
{
if (ASTLiteral * lit = typeid_cast<ASTLiteral *>(node.get()))
return convertToType(lit->value, type);
return convertFieldToType(lit->value, type);
else if (typeid_cast<ASTFunction *>(node.get()))
return convertToType(evaluateConstantExpression(node, context), type);
return convertFieldToType(evaluateConstantExpression(node, context), type);
else
throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
}

View File

@ -0,0 +1,146 @@
#include <DB/IO/ReadBufferFromString.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypeFixedString.h>
#include <DB/DataTypes/DataTypeDate.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/DataTypes/DataTypeEnum.h>
#include <DB/Interpreters/convertFieldToType.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int TYPE_MISMATCH;
}
/** Проверка попадания Field from, имеющим тип From в диапазон значений типа To.
* From и To - числовые типы. Могут быть типами с плавающей запятой.
* From - это одно из UInt64, Int64, Float64,
* тогда как To может быть также 8, 16, 32 битным.
*
* Если попадает в диапазон, то from конвертируется в Field ближайшего к To типа.
* Если не попадает - возвращается Field(Null).
*/
template <typename From, typename To>
static Field convertNumericTypeImpl(const Field & from)
{
From value = from.get<From>();
if (static_cast<long double>(value) != static_cast<long double>(To(value)))
return {};
return Field(typename NearestFieldType<To>::Type(value));
}
template <typename To>
static Field convertNumericType(const Field & from, const IDataType & type)
{
if (from.getType() == Field::Types::UInt64)
return convertNumericTypeImpl<UInt64, To>(from);
if (from.getType() == Field::Types::Int64)
return convertNumericTypeImpl<Int64, To>(from);
if (from.getType() == Field::Types::Float64)
return convertNumericTypeImpl<Float64, To>(from);
throw Exception("Type mismatch in IN or VALUES section: " + type.getName() + " expected, "
+ Field::Types::toString(from.getType()) + " got", ErrorCodes::TYPE_MISMATCH);
}
Field convertFieldToType(const Field & src, const IDataType & type)
{
if (type.isNumeric())
{
if (typeid_cast<const DataTypeUInt8 *>(&type)) return convertNumericType<UInt8>(src, type);
if (typeid_cast<const DataTypeUInt16 *>(&type)) return convertNumericType<UInt16>(src, type);
if (typeid_cast<const DataTypeUInt32 *>(&type)) return convertNumericType<UInt32>(src, type);
if (typeid_cast<const DataTypeUInt64 *>(&type)) return convertNumericType<UInt64>(src, type);
if (typeid_cast<const DataTypeInt8 *>(&type)) return convertNumericType<Int8>(src, type);
if (typeid_cast<const DataTypeInt16 *>(&type)) return convertNumericType<Int16>(src, type);
if (typeid_cast<const DataTypeInt32 *>(&type)) return convertNumericType<Int32>(src, type);
if (typeid_cast<const DataTypeInt64 *>(&type)) return convertNumericType<Int64>(src, type);
if (typeid_cast<const DataTypeFloat32 *>(&type)) return convertNumericType<Float32>(src, type);
if (typeid_cast<const DataTypeFloat64 *>(&type)) return convertNumericType<Float64>(src, type);
const bool is_date = typeid_cast<const DataTypeDate *>(&type);
bool is_datetime = false;
bool is_enum8 = false;
bool is_enum16 = false;
if (!is_date)
if (!(is_datetime = typeid_cast<const DataTypeDateTime *>(&type)))
if (!(is_enum8 = typeid_cast<const DataTypeEnum8 *>(&type)))
if (!(is_enum16 = typeid_cast<const DataTypeEnum16 *>(&type)))
throw Exception{
"Logical error: unknown numeric type " + type.getName(),
ErrorCodes::LOGICAL_ERROR
};
const auto is_enum = is_enum8 || is_enum16;
/// Numeric values for Enums should not be used directly in IN section
if (src.getType() == Field::Types::UInt64 && !is_enum)
return src;
if (src.getType() == Field::Types::String)
{
/// Возможность сравнивать даты и даты-с-временем со строкой.
const String & str = src.get<const String &>();
ReadBufferFromString in(str);
if (is_date)
{
DayNum_t date{};
readDateText(date, in);
if (!in.eof())
throw Exception("String is too long for Date: " + str);
return Field(UInt64(date));
}
else if (is_datetime)
{
time_t date_time{};
readDateTimeText(date_time, in);
if (!in.eof())
throw Exception("String is too long for DateTime: " + str);
return Field(UInt64(date_time));
}
else if (is_enum8)
return Field(UInt64(static_cast<const DataTypeEnum8 &>(type).getValue(str)));
else if (is_enum16)
return Field(UInt64(static_cast<const DataTypeEnum16 &>(type).getValue(str)));
}
throw Exception("Type mismatch in IN or VALUES section: " + type.getName() + " expected, "
+ Field::Types::toString(src.getType()) + " got", ErrorCodes::TYPE_MISMATCH);
}
else
{
if (src.getType() == Field::Types::UInt64
|| src.getType() == Field::Types::Int64
|| src.getType() == Field::Types::Float64
|| src.getType() == Field::Types::Null
|| (src.getType() == Field::Types::String
&& !typeid_cast<const DataTypeString *>(&type)
&& !typeid_cast<const DataTypeFixedString *>(&type))
|| (src.getType() == Field::Types::Array
&& !typeid_cast<const DataTypeArray *>(&type)))
throw Exception("Type mismatch in IN or VALUES section: " + type.getName() + " expected, "
+ Field::Types::toString(src.getType()) + " got", ErrorCodes::TYPE_MISMATCH);
}
return src;
}
}

View File

@ -0,0 +1,47 @@
#include <DB/Core/Block.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Parsers/IAST.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/Interpreters/Context.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Interpreters/ExpressionActions.h>
#include <DB/Interpreters/evaluateConstantExpression.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
/** Выполнить константное выражение (для элемента множества в IN). Весьма неоптимально. */
Field evaluateConstantExpression(ASTPtr & node, const Context & context)
{
ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(
node, context, nullptr, NamesAndTypesList{{ "_dummy", new DataTypeUInt8 }}).getConstActions();
/// В блоке должен быть хотя бы один столбец, чтобы у него было известно число строк.
Block block_with_constants{{ new ColumnConstUInt8(1, 0), new DataTypeUInt8, "_dummy" }};
expr_for_constant_folding->execute(block_with_constants);
if (!block_with_constants || block_with_constants.rows() == 0)
throw Exception("Logical error: empty block after evaluation constant expression for IN or VALUES", ErrorCodes::LOGICAL_ERROR);
String name = node->getColumnName();
if (!block_with_constants.has(name))
throw Exception("Element of set in IN or VALUES is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
const IColumn & result_column = *block_with_constants.getByName(name).column;
if (!result_column.isConst())
throw Exception("Element of set in IN or VALUES is not a constant expression: " + name, ErrorCodes::BAD_ARGUMENTS);
return result_column[0];
}
}

View File

@ -4,7 +4,6 @@
#include <DB/IO/ConcatReadBuffer.h>
#include <DB/DataStreams/BlockIO.h>
#include <DB/DataStreams/FormatFactory.h>
#include <DB/DataStreams/copyData.h>
#include <DB/DataStreams/IProfilingBlockInputStream.h>
@ -392,7 +391,7 @@ void executeQuery(
ConcatReadBuffer data_istr(buffers);
BlockInputStreamPtr in{
context.getFormatFactory().getInput(
context.getInputFormat(
format, data_istr, streams.out_sample, context.getSettings().max_insert_block_size)};
copyData(*in, *streams.out);
@ -406,7 +405,7 @@ void executeQuery(
? typeid_cast<const ASTIdentifier &>(*ast_query_with_output->getFormat()).name
: context.getDefaultFormat();
BlockOutputStreamPtr out = context.getFormatFactory().getOutput(format_name, ostr, streams.in_sample);
BlockOutputStreamPtr out = context.getOutputFormat(format_name, ostr, streams.in_sample);
if (set_content_type)
set_content_type(out->getContentType());