2012-08-23 23:49:28 +00:00
|
|
|
|
#include <iomanip>
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
#include <DB/Core/Field.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/Columns/ColumnFixedString.h>
|
|
|
|
|
#include <DB/Columns/ColumnsNumber.h>
|
|
|
|
|
|
2012-08-23 23:49:28 +00:00
|
|
|
|
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
2012-08-24 19:42:03 +00:00
|
|
|
|
#include <DB/DataStreams/OneBlockInputStream.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/Parsers/ASTExpressionList.h>
|
|
|
|
|
#include <DB/Parsers/ASTFunction.h>
|
|
|
|
|
#include <DB/Parsers/ASTLiteral.h>
|
2012-08-23 23:49:28 +00:00
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
#include <DB/Interpreters/Set.h>
|
2013-03-19 12:25:59 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypeArray.h>
|
2013-03-25 13:02:12 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypesNumberFixed.h>
|
|
|
|
|
#include <DB/DataTypes/DataTypeString.h>
|
|
|
|
|
#include <DB/DataTypes/DataTypeFixedString.h>
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2013-03-25 13:02:12 +00:00
|
|
|
|
Set::Type Set::chooseMethod(const DataTypes & key_types, bool & keys_fit_128_bits, Sizes & key_sizes)
|
2012-08-23 20:22:44 +00:00
|
|
|
|
{
|
2013-03-25 13:02:12 +00:00
|
|
|
|
size_t keys_size = key_types.size();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
keys_fit_128_bits = true;
|
|
|
|
|
size_t keys_bytes = 0;
|
|
|
|
|
key_sizes.resize(keys_size);
|
|
|
|
|
for (size_t j = 0; j < keys_size; ++j)
|
|
|
|
|
{
|
2013-03-25 13:02:12 +00:00
|
|
|
|
if (!key_types[j]->isNumeric())
|
2012-08-23 20:22:44 +00:00
|
|
|
|
{
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2013-03-25 13:02:12 +00:00
|
|
|
|
key_sizes[j] = key_types[j]->getSizeOfField();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
keys_bytes += key_sizes[j];
|
|
|
|
|
}
|
|
|
|
|
if (keys_bytes > 16)
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
|
|
|
|
|
/// Если есть один ключ, который помещается в 64 бита, и это не число с плавающей запятой
|
2013-03-25 13:02:12 +00:00
|
|
|
|
if (keys_size == 1 && key_types[0]->isNumeric()
|
|
|
|
|
&& !dynamic_cast<const DataTypeFloat32 *>(&*key_types[0]) && !dynamic_cast<const DataTypeFloat64 *>(&*key_types[0]))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return KEY_64;
|
|
|
|
|
|
|
|
|
|
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
|
|
|
|
|
if (keys_size == 1
|
2013-03-25 13:02:12 +00:00
|
|
|
|
&& (dynamic_cast<const DataTypeString *>(&*key_types[0]) || dynamic_cast<const DataTypeFixedString *>(&*key_types[0])))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return KEY_STRING;
|
|
|
|
|
|
|
|
|
|
/// Если много ключей - будем строить множество хэшей от них
|
|
|
|
|
return HASHED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void Set::create(BlockInputStreamPtr stream)
|
|
|
|
|
{
|
|
|
|
|
LOG_TRACE(log, "Creating set");
|
2012-08-23 23:49:28 +00:00
|
|
|
|
Stopwatch watch;
|
|
|
|
|
size_t entries = 0;
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
/// Читаем все данные
|
|
|
|
|
while (Block block = stream->read())
|
|
|
|
|
{
|
|
|
|
|
size_t keys_size = block.columns();
|
|
|
|
|
Row key(keys_size);
|
2013-01-08 19:41:22 +00:00
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
2012-08-23 22:40:51 +00:00
|
|
|
|
data_types.resize(keys_size);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
/// Запоминаем столбцы, с которыми будем работать
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2012-08-23 22:40:51 +00:00
|
|
|
|
{
|
2012-08-23 20:22:44 +00:00
|
|
|
|
key_columns[i] = block.getByPosition(i).column;
|
2012-08-23 22:40:51 +00:00
|
|
|
|
data_types[i] = block.getByPosition(i).type;
|
|
|
|
|
}
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
size_t rows = block.rows();
|
|
|
|
|
|
|
|
|
|
/// Какую структуру данных для множества использовать?
|
2013-03-25 13:02:12 +00:00
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
type = chooseMethod(data_types, keys_fit_128_bits, key_sizes);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
if (type == KEY_64)
|
|
|
|
|
{
|
|
|
|
|
SetUInt64 & res = key64;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
2013-02-08 20:34:30 +00:00
|
|
|
|
UInt64 key = get<UInt64>(column[i]);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
res.insert(key);
|
|
|
|
|
}
|
2012-08-23 23:49:28 +00:00
|
|
|
|
|
|
|
|
|
entries = res.size();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
else if (type == KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
SetString & res = key_string;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
if (const ColumnString * column_string = dynamic_cast<const ColumnString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
2012-08-23 20:35:05 +00:00
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
|
|
|
|
|
|
|
|
|
SetString::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
it->data = string_pool.insert(ref.data, ref.size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (const ColumnFixedString * column_string = dynamic_cast<const ColumnFixedString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i * n], n);
|
|
|
|
|
|
|
|
|
|
SetString::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
it->data = string_pool.insert(ref.data, ref.size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when creating set with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2012-08-23 23:49:28 +00:00
|
|
|
|
|
|
|
|
|
entries = res.size();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
else if (type == HASHED)
|
|
|
|
|
{
|
|
|
|
|
SetHashed & res = hashed;
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2012-08-23 23:49:28 +00:00
|
|
|
|
res.insert(pack128(i, keys_fit_128_bits, keys_size, key, key_columns, key_sizes));
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
2012-08-23 23:49:28 +00:00
|
|
|
|
entries = res.size();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
else if (type == GENERIC)
|
|
|
|
|
{
|
|
|
|
|
/// Общий способ
|
|
|
|
|
SetGeneric & res = generic;
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
for (size_t j = 0; j < keys_size; ++j)
|
2013-01-07 06:47:15 +00:00
|
|
|
|
key_columns[j]->get(i, key[j]);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
res.insert(key);
|
2013-01-07 00:57:43 +00:00
|
|
|
|
key.resize(keys_size);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
2012-08-23 23:49:28 +00:00
|
|
|
|
|
|
|
|
|
entries = res.size();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-23 23:49:28 +00:00
|
|
|
|
logProfileInfo(watch, *stream, entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void Set::logProfileInfo(Stopwatch & watch, IBlockInputStream & in, size_t entries)
|
|
|
|
|
{
|
|
|
|
|
/// Выведем информацию о том, сколько считано строк и байт.
|
|
|
|
|
size_t rows = 0;
|
|
|
|
|
size_t bytes = 0;
|
|
|
|
|
|
|
|
|
|
in.getLeafRowsBytes(rows, bytes);
|
|
|
|
|
|
|
|
|
|
size_t head_rows = 0;
|
|
|
|
|
if (IProfilingBlockInputStream * profiling_in = dynamic_cast<IProfilingBlockInputStream *>(&in))
|
|
|
|
|
head_rows = profiling_in->getInfo().rows;
|
|
|
|
|
|
|
|
|
|
if (rows != 0)
|
|
|
|
|
{
|
|
|
|
|
LOG_DEBUG(log, std::fixed << std::setprecision(3)
|
|
|
|
|
<< "Created set with " << entries << " entries from " << head_rows << " rows."
|
|
|
|
|
<< " Read " << rows << " rows, " << bytes / 1048576.0 << " MiB in " << watch.elapsedSeconds() << " sec., "
|
|
|
|
|
<< static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.");
|
|
|
|
|
}
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-08-24 19:42:03 +00:00
|
|
|
|
void Set::create(DataTypes & types, ASTPtr node)
|
|
|
|
|
{
|
|
|
|
|
data_types = types;
|
|
|
|
|
|
|
|
|
|
/// Засунем множество в блок.
|
|
|
|
|
Block block;
|
|
|
|
|
for (size_t i = 0, size = data_types.size(); i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType col;
|
|
|
|
|
col.type = data_types[i];
|
|
|
|
|
col.column = data_types[i]->createColumn();
|
|
|
|
|
col.name = "_" + Poco::NumberFormatter::format(i);
|
|
|
|
|
|
|
|
|
|
block.insert(col);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ASTExpressionList & list = dynamic_cast<ASTExpressionList &>(*node);
|
|
|
|
|
for (ASTs::iterator it = list.children.begin(); it != list.children.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
if (data_types.size() == 1)
|
|
|
|
|
{
|
|
|
|
|
if (ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&**it))
|
|
|
|
|
block.getByPosition(0).column->insert(lit->value);
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Incorrect element of set. Must be literal.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
}
|
|
|
|
|
else if (ASTFunction * func = dynamic_cast<ASTFunction *>(&**it))
|
|
|
|
|
{
|
|
|
|
|
if (func->name != "tuple")
|
|
|
|
|
throw Exception("Incorrect element of set. Must be tuple.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
|
|
size_t tuple_size = func->arguments->children.size();
|
|
|
|
|
if (tuple_size != data_types.size())
|
|
|
|
|
throw Exception("Incorrect size of tuple in set.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < tuple_size; ++j)
|
|
|
|
|
{
|
|
|
|
|
if (ASTLiteral * lit = dynamic_cast<ASTLiteral *>(&*func->arguments->children[j]))
|
|
|
|
|
block.getByPosition(j).column->insert(lit->value);
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Incorrect element of tuple in set. Must be literal.", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET);
|
|
|
|
|
|
|
|
|
|
/// NOTE: Потом можно реализовать возможность задавать константные выражения в множествах.
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
create(new OneBlockInputStream(block));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
void Set::execute(Block & block, const ColumnNumbers & arguments, size_t result, bool negative) const
|
|
|
|
|
{
|
|
|
|
|
ColumnUInt8 * c_res = new ColumnUInt8;
|
|
|
|
|
block.getByPosition(result).column = c_res;
|
|
|
|
|
ColumnUInt8::Container_t & vec_res = c_res->getData();
|
|
|
|
|
vec_res.resize(block.getByPosition(arguments[0]).column->size());
|
2012-08-23 20:35:05 +00:00
|
|
|
|
|
2012-08-24 20:40:34 +00:00
|
|
|
|
/// Если множество пусто
|
|
|
|
|
if (data_types.empty())
|
2013-02-25 18:17:54 +00:00
|
|
|
|
{
|
|
|
|
|
if (negative)
|
|
|
|
|
memset(&vec_res[0], 1, vec_res.size());
|
2012-08-24 20:40:34 +00:00
|
|
|
|
return;
|
2013-02-25 18:17:54 +00:00
|
|
|
|
}
|
2013-03-19 11:25:28 +00:00
|
|
|
|
|
2013-03-19 12:25:59 +00:00
|
|
|
|
DataTypeArray * array_type = dynamic_cast<DataTypeArray *>(&*block.getByPosition(arguments[0]).type);
|
|
|
|
|
|
|
|
|
|
if (array_type)
|
2012-08-23 22:40:51 +00:00
|
|
|
|
{
|
2013-03-19 12:25:59 +00:00
|
|
|
|
if (data_types.size() != 1 || arguments.size() != 1)
|
|
|
|
|
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
if (array_type->getNestedType()->getName() != data_types[0]->getName())
|
|
|
|
|
throw Exception("Types in section IN don't match.", ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
|
2013-03-25 13:02:12 +00:00
|
|
|
|
IColumn * in_column = &*block.getByPosition(arguments[0]).column;
|
|
|
|
|
if (ColumnConstArray * col = dynamic_cast<ColumnConstArray *>(in_column))
|
|
|
|
|
executeConstArray(col, vec_res, negative);
|
|
|
|
|
else if (ColumnArray * col = dynamic_cast<ColumnArray *>(in_column))
|
|
|
|
|
executeArray(col, vec_res, negative);
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unexpeced array column type: " + in_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2013-03-19 12:25:59 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (data_types.size() != arguments.size())
|
|
|
|
|
throw Exception("Number of columns in section IN doesn't match.", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
|
|
/// Запоминаем столбцы, с которыми будем работать. Также проверим, что типы данных правильные.
|
|
|
|
|
ConstColumnPlainPtrs key_columns(arguments.size());
|
|
|
|
|
for (size_t i = 0; i < arguments.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
key_columns[i] = block.getByPosition(arguments[i]).column;
|
|
|
|
|
|
|
|
|
|
if (data_types[i]->getName() != block.getByPosition(arguments[i]).type->getName())
|
|
|
|
|
throw Exception("Types in section IN don't match.", ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
executeOrdinary(key_columns, vec_res, negative);
|
2012-08-23 22:40:51 +00:00
|
|
|
|
}
|
2013-03-19 12:25:59 +00:00
|
|
|
|
}
|
2012-08-23 22:40:51 +00:00
|
|
|
|
|
2013-03-19 12:25:59 +00:00
|
|
|
|
void Set::executeOrdinary(const ConstColumnPlainPtrs & key_columns, ColumnUInt8::Container_t & vec_res, bool negative) const
|
|
|
|
|
{
|
|
|
|
|
size_t keys_size = data_types.size();
|
|
|
|
|
size_t rows = key_columns[0]->size();
|
|
|
|
|
Row key(keys_size);
|
2012-08-23 20:35:05 +00:00
|
|
|
|
|
|
|
|
|
if (type == KEY_64)
|
|
|
|
|
{
|
2012-08-23 22:27:10 +00:00
|
|
|
|
const SetUInt64 & set = key64;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
2013-02-08 20:34:30 +00:00
|
|
|
|
UInt64 key = get<UInt64>(column[i]);
|
2012-08-23 20:35:05 +00:00
|
|
|
|
vec_res[i] = negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (type == KEY_STRING)
|
|
|
|
|
{
|
2012-08-23 22:27:10 +00:00
|
|
|
|
const SetString & set = key_string;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
if (const ColumnString * column_string = dynamic_cast<const ColumnString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
|
|
|
|
vec_res[i] = negative ^ (set.end() != set.find(ref));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (const ColumnFixedString * column_string = dynamic_cast<const ColumnFixedString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i * n], n);
|
|
|
|
|
vec_res[i] = negative ^ (set.end() != set.find(ref));
|
|
|
|
|
}
|
|
|
|
|
}
|
2012-08-24 19:42:03 +00:00
|
|
|
|
else if (const ColumnConstString * column_string = dynamic_cast<const ColumnConstString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
bool res = negative ^ (set.end() != set.find(StringRef(column_string->getData())));
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
}
|
2012-08-23 20:35:05 +00:00
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when creating set with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
|
|
|
|
else if (type == HASHED)
|
|
|
|
|
{
|
2012-08-23 22:27:10 +00:00
|
|
|
|
const SetHashed & set = hashed;
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2013-02-17 20:19:05 +00:00
|
|
|
|
vec_res[i] = negative ^ (set.end() != set.find(pack128(i, keys_fit_128_bits, keys_size, key_columns, key_sizes)));
|
2012-08-23 20:35:05 +00:00
|
|
|
|
}
|
|
|
|
|
else if (type == GENERIC)
|
|
|
|
|
{
|
|
|
|
|
/// Общий способ
|
2012-08-23 22:27:10 +00:00
|
|
|
|
const SetGeneric & set = generic;
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
for (size_t j = 0; j < keys_size; ++j)
|
2013-01-07 06:47:15 +00:00
|
|
|
|
key_columns[j]->get(i, key[j]);
|
2013-03-19 12:25:59 +00:00
|
|
|
|
|
2012-08-23 20:35:05 +00:00
|
|
|
|
vec_res[i] = negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-03-25 13:02:12 +00:00
|
|
|
|
void Set::executeArray(const ColumnArray * key_column, ColumnUInt8::Container_t & vec_res, bool negative) const
|
|
|
|
|
{
|
|
|
|
|
size_t rows = key_column->size();
|
|
|
|
|
const ColumnArray::Offsets_t & offsets = key_column->getOffsets();
|
|
|
|
|
const IColumn & nested_column = key_column->getData();
|
|
|
|
|
|
|
|
|
|
if (type == KEY_64)
|
|
|
|
|
{
|
|
|
|
|
const SetUInt64 & set = key64;
|
|
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = prev_offset; j < offsets[i] && !res; ++j)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
UInt64 key = get<UInt64>(nested_column[j]);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (type == KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
const SetString & set = key_string;
|
|
|
|
|
|
|
|
|
|
if (const ColumnString * column_string = dynamic_cast<const ColumnString *>(&nested_column))
|
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & nested_offsets = column_string->getOffsets();
|
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
|
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = prev_offset; j < offsets[i]; ++j)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
size_t begin = j == 0 ? 0 : nested_offsets[j - 1];
|
|
|
|
|
size_t end = nested_offsets[j];
|
|
|
|
|
StringRef ref(&data[begin], end - begin - 1);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(ref));
|
|
|
|
|
}
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (const ColumnFixedString * column_string = dynamic_cast<const ColumnFixedString *>(&nested_column))
|
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
|
|
|
|
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_string->getData()).getData();
|
|
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = prev_offset; j < offsets[i]; ++j)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[j * n], n);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(ref));
|
|
|
|
|
}
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when looking for Array(String) key: " + nested_column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
|
|
|
|
else if (type == HASHED)
|
|
|
|
|
{
|
|
|
|
|
const SetHashed & set = hashed;
|
|
|
|
|
ConstColumnPlainPtrs nested_columns(1, &nested_column);
|
|
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = prev_offset; j < offsets[i]; ++j)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
res |= negative ^ (set.end() != set.find(pack128(j, keys_fit_128_bits, 1, nested_columns, key_sizes)));
|
|
|
|
|
}
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (type == GENERIC)
|
|
|
|
|
{
|
|
|
|
|
/// Общий способ
|
|
|
|
|
const SetGeneric & set = generic;
|
|
|
|
|
|
|
|
|
|
Row key(1);
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = prev_offset; j < offsets[i]; ++j)
|
|
|
|
|
{
|
|
|
|
|
nested_column.get(j, key[0]);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
vec_res[i] = res;
|
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Set::executeConstArray(const ColumnConstArray * key_column, ColumnUInt8::Container_t & vec_res, bool negative) const
|
2013-03-19 12:25:59 +00:00
|
|
|
|
{
|
2013-03-25 13:02:12 +00:00
|
|
|
|
if (type == HASHED)
|
|
|
|
|
{
|
|
|
|
|
ColumnPtr full_column = key_column->convertToFullColumn();
|
|
|
|
|
executeArray(dynamic_cast<ColumnArray *>(&*full_column), vec_res, negative);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t rows = key_column->size();
|
|
|
|
|
Array values = key_column->getData();
|
|
|
|
|
UInt8 res = 0;
|
|
|
|
|
|
|
|
|
|
/// Для всех элементов
|
|
|
|
|
for (size_t j = 0; j < values.size() && !res; ++j)
|
|
|
|
|
{
|
|
|
|
|
if (type == KEY_64)
|
|
|
|
|
{
|
|
|
|
|
const SetUInt64 & set = key64;
|
|
|
|
|
UInt64 key = get<UInt64>(values[j]);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
else if (type == KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
const SetString & set = key_string;
|
|
|
|
|
res |= negative ^ (set.end() != set.find(StringRef(get<String>(values[j]))));
|
|
|
|
|
}
|
|
|
|
|
else if (type == GENERIC)
|
|
|
|
|
{
|
|
|
|
|
const SetGeneric & set = generic;
|
|
|
|
|
Row key(1, values[j]);
|
|
|
|
|
res |= negative ^ (set.end() != set.find(key));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown set variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
|
|
|
|
}
|
2013-03-19 12:25:59 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|