2014-06-12 02:31:30 +00:00
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/Columns/ColumnFixedString.h>
|
|
|
|
|
|
2014-06-17 17:07:22 +00:00
|
|
|
|
#include <DB/Parsers/ASTJoin.h>
|
2014-06-12 02:31:30 +00:00
|
|
|
|
#include <DB/Interpreters/Join.h>
|
2015-04-17 08:46:06 +00:00
|
|
|
|
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
2015-03-02 01:10:58 +00:00
|
|
|
|
Join::Type Join::chooseMethod(const ConstColumnPlainPtrs & key_columns, bool & keys_fit_128_bits, Sizes & key_sizes)
|
|
|
|
|
{
|
|
|
|
|
size_t keys_size = key_columns.size();
|
|
|
|
|
|
|
|
|
|
keys_fit_128_bits = true;
|
|
|
|
|
size_t keys_bytes = 0;
|
|
|
|
|
key_sizes.resize(keys_size);
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < keys_size; ++j)
|
|
|
|
|
{
|
|
|
|
|
if (!key_columns[j]->isFixed())
|
|
|
|
|
{
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
key_sizes[j] = key_columns[j]->sizeOfField();
|
|
|
|
|
keys_bytes += key_sizes[j];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (keys_bytes > 16)
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
|
|
|
|
|
/// Если есть один числовой ключ, который помещается в 64 бита
|
|
|
|
|
if (keys_size == 1 && key_columns[0]->isNumeric())
|
|
|
|
|
return Type::KEY_64;
|
|
|
|
|
|
|
|
|
|
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
|
|
|
|
|
if (keys_size == 1
|
|
|
|
|
&& (typeid_cast<const ColumnString *>(key_columns[0])
|
|
|
|
|
|| typeid_cast<const ColumnConstString *>(key_columns[0])
|
|
|
|
|
|| (typeid_cast<const ColumnFixedString *>(key_columns[0]) && !keys_fit_128_bits)))
|
|
|
|
|
return Type::KEY_STRING;
|
|
|
|
|
|
|
|
|
|
/// Если много ключей - будем строить множество хэшей от них
|
|
|
|
|
return Type::HASHED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
template <typename Maps>
|
2015-03-02 01:10:58 +00:00
|
|
|
|
static void initImpl(Maps & maps, Join::Type type)
|
2014-06-18 20:08:31 +00:00
|
|
|
|
{
|
|
|
|
|
switch (type)
|
|
|
|
|
{
|
2015-03-02 01:10:58 +00:00
|
|
|
|
case Join::Type::EMPTY: break;
|
|
|
|
|
case Join::Type::KEY_64: maps.key64 .reset(new typename Maps::MapUInt64); break;
|
|
|
|
|
case Join::Type::KEY_STRING: maps.key_string .reset(new typename Maps::MapString); break;
|
|
|
|
|
case Join::Type::HASHED: maps.hashed .reset(new typename Maps::MapHashed); break;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Maps>
|
|
|
|
|
static size_t getTotalRowCountImpl(const Maps & maps)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
|
|
|
|
size_t rows = 0;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
if (maps.key64)
|
|
|
|
|
rows += maps.key64->size();
|
|
|
|
|
if (maps.key_string)
|
|
|
|
|
rows += maps.key_string->size();
|
|
|
|
|
if (maps.hashed)
|
|
|
|
|
rows += maps.hashed->size();
|
2014-06-12 02:31:30 +00:00
|
|
|
|
return rows;
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
template <typename Maps>
|
|
|
|
|
static size_t getTotalByteCountImpl(const Maps & maps)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
|
|
|
|
size_t bytes = 0;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
if (maps.key64)
|
|
|
|
|
bytes += maps.key64->getBufferSizeInBytes();
|
|
|
|
|
if (maps.key_string)
|
|
|
|
|
bytes += maps.key_string->getBufferSizeInBytes();
|
|
|
|
|
if (maps.hashed)
|
|
|
|
|
bytes += maps.hashed->getBufferSizeInBytes();
|
2014-06-12 02:31:30 +00:00
|
|
|
|
return bytes;
|
|
|
|
|
}
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
/// Нужно ли использовать хэш-таблицы maps_*_full, в которых запоминается, была ли строчка присоединена.
|
|
|
|
|
static bool getFullness(ASTJoin::Kind kind)
|
|
|
|
|
{
|
|
|
|
|
return kind == ASTJoin::Right || kind == ASTJoin::Full;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-03-02 01:10:58 +00:00
|
|
|
|
void Join::init(Type type_)
|
2014-06-18 20:08:31 +00:00
|
|
|
|
{
|
|
|
|
|
type = type_;
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if (!getFullness(kind))
|
|
|
|
|
{
|
|
|
|
|
if (strictness == ASTJoin::Any)
|
|
|
|
|
initImpl(maps_any, type);
|
|
|
|
|
else
|
|
|
|
|
initImpl(maps_all, type);
|
|
|
|
|
}
|
2014-06-18 20:08:31 +00:00
|
|
|
|
else
|
2015-04-17 08:46:06 +00:00
|
|
|
|
{
|
|
|
|
|
if (strictness == ASTJoin::Any)
|
|
|
|
|
initImpl(maps_any_full, type);
|
|
|
|
|
else
|
|
|
|
|
initImpl(maps_all_full, type);
|
|
|
|
|
}
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Join::getTotalRowCount() const
|
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
size_t res = 0;
|
|
|
|
|
res += getTotalRowCountImpl(maps_any);
|
|
|
|
|
res += getTotalRowCountImpl(maps_all);
|
|
|
|
|
res += getTotalRowCountImpl(maps_any_full);
|
|
|
|
|
res += getTotalRowCountImpl(maps_all_full);
|
|
|
|
|
return res;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t Join::getTotalByteCount() const
|
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
size_t res = 0;
|
|
|
|
|
res += getTotalByteCountImpl(maps_any);
|
|
|
|
|
res += getTotalByteCountImpl(maps_all);
|
|
|
|
|
res += getTotalByteCountImpl(maps_any_full);
|
|
|
|
|
res += getTotalByteCountImpl(maps_all_full);
|
|
|
|
|
res += pool.size();
|
|
|
|
|
return res;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-06-12 02:31:30 +00:00
|
|
|
|
bool Join::checkSizeLimits() const
|
|
|
|
|
{
|
|
|
|
|
if (max_rows && getTotalRowCount() > max_rows)
|
|
|
|
|
return false;
|
|
|
|
|
if (max_bytes && getTotalByteCount() > max_bytes)
|
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
/// Вставка элемента в хэш-таблицу вида ключ -> ссылка на строку, которая затем будет использоваться при JOIN-е.
|
2014-06-18 19:14:29 +00:00
|
|
|
|
template <ASTJoin::Strictness STRICTNESS, typename Map>
|
|
|
|
|
struct Inserter
|
|
|
|
|
{
|
|
|
|
|
static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Map>
|
|
|
|
|
struct Inserter<ASTJoin::Any, Map>
|
|
|
|
|
{
|
|
|
|
|
static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
|
|
|
|
|
{
|
|
|
|
|
typename Map::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
map.emplace(key, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
2015-04-17 08:46:06 +00:00
|
|
|
|
new (&it->second) typename Map::mapped_type(stored_block, i);
|
2014-06-18 19:14:29 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
/// Для строковых ключей отличается тем, что саму строчку надо разместить в пуле.
|
2015-04-17 08:46:06 +00:00
|
|
|
|
template <typename Map>
|
|
|
|
|
struct InserterAnyString
|
2014-06-18 19:14:29 +00:00
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
|
2014-06-18 19:14:29 +00:00
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
typename Map::iterator it;
|
2014-06-18 19:14:29 +00:00
|
|
|
|
bool inserted;
|
|
|
|
|
map.emplace(key, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = pool.insert(key.data, key.size);
|
2015-04-17 08:46:06 +00:00
|
|
|
|
new (&it->second) typename Map::mapped_type(stored_block, i);
|
2014-06-18 19:14:29 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
template <> struct Inserter<ASTJoin::Any, Join::MapsAny::MapString> : InserterAnyString<Join::MapsAny::MapString> {};
|
|
|
|
|
template <> struct Inserter<ASTJoin::Any, Join::MapsAnyFull::MapString> : InserterAnyString<Join::MapsAnyFull::MapString> {};
|
|
|
|
|
|
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
template <typename Map>
|
|
|
|
|
struct Inserter<ASTJoin::All, Map>
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2014-06-18 20:08:31 +00:00
|
|
|
|
static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2014-06-18 20:08:31 +00:00
|
|
|
|
typename Map::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
map.emplace(key, it, inserted);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
if (inserted)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
new (&it->second) typename Map::mapped_type(stored_block, i);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/** Первый элемент списка хранится в значении хэш-таблицы, остальные - в pool-е.
|
|
|
|
|
* Мы будем вставлять каждый раз элемент на место второго.
|
|
|
|
|
* То есть, бывший второй элемент, если он был, станет третьим, и т. п.
|
|
|
|
|
*/
|
2015-04-17 08:46:06 +00:00
|
|
|
|
auto elem = reinterpret_cast<typename Map::mapped_type *>(pool.alloc(sizeof(typename Map::mapped_type)));
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
|
|
|
|
elem->next = it->second.next;
|
2014-06-19 19:00:58 +00:00
|
|
|
|
it->second.next = elem;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
elem->block = stored_block;
|
|
|
|
|
elem->row_num = i;
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-18 20:08:31 +00:00
|
|
|
|
};
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
template <typename Map>
|
|
|
|
|
struct InserterAllString
|
2014-06-18 20:08:31 +00:00
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
static void insert(Map & map, const typename Map::key_type & key, Block * stored_block, size_t i, Arena & pool)
|
2014-06-18 20:08:31 +00:00
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
typename Map::iterator it;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
bool inserted;
|
|
|
|
|
map.emplace(key, it, inserted);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = pool.insert(key.data, key.size);
|
2015-04-17 08:46:06 +00:00
|
|
|
|
new (&it->second) typename Map::mapped_type(stored_block, i);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
auto elem = reinterpret_cast<typename Map::mapped_type *>(pool.alloc(sizeof(typename Map::mapped_type)));
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
elem->next = it->second.next;
|
2014-06-19 19:00:58 +00:00
|
|
|
|
it->second.next = elem;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
elem->block = stored_block;
|
|
|
|
|
elem->row_num = i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
template <> struct Inserter<ASTJoin::All, Join::MapsAll::MapString> : InserterAllString<Join::MapsAll::MapString> {};
|
|
|
|
|
template <> struct Inserter<ASTJoin::All, Join::MapsAllFull::MapString> : InserterAllString<Join::MapsAllFull::MapString> {};
|
|
|
|
|
|
2014-06-13 02:05:05 +00:00
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
template <ASTJoin::Strictness STRICTNESS, typename Maps>
|
|
|
|
|
void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainPtrs & key_columns, size_t keys_size, Block * stored_block)
|
|
|
|
|
{
|
2015-03-02 01:10:58 +00:00
|
|
|
|
if (type == Type::KEY_64)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2014-06-18 20:08:31 +00:00
|
|
|
|
typedef typename Maps::MapUInt64 Map;
|
|
|
|
|
Map & res = *maps.key64;
|
2014-06-12 02:31:30 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
UInt64 key = column.get64(i);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
Inserter<STRICTNESS, Map>::insert(res, key, stored_block, i, pool);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-03-02 01:10:58 +00:00
|
|
|
|
else if (type == Type::KEY_STRING)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2014-06-18 20:08:31 +00:00
|
|
|
|
typedef typename Maps::MapString Map;
|
|
|
|
|
Map & res = *maps.key_string;
|
2014-06-12 02:31:30 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (const ColumnString * column_string = typeid_cast<const ColumnString *>(&column))
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
|
|
|
|
const ColumnString::Chars_t & data = column_string->getChars();
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
2014-06-18 19:14:29 +00:00
|
|
|
|
StringRef key(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
Inserter<STRICTNESS, Map>::insert(res, key, stored_block, i, pool);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
else if (const ColumnFixedString * column_string = typeid_cast<const ColumnFixedString *>(&column))
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
|
|
|
|
const ColumnFixedString::Chars_t & data = column_string->getChars();
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
2014-06-18 19:14:29 +00:00
|
|
|
|
StringRef key(&data[i * n], n);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
Inserter<STRICTNESS, Map>::insert(res, key, stored_block, i, pool);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2015-02-13 01:17:44 +00:00
|
|
|
|
throw Exception("Illegal type of column when creating join with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
2015-03-02 01:10:58 +00:00
|
|
|
|
else if (type == Type::HASHED)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
{
|
2014-06-18 20:08:31 +00:00
|
|
|
|
typedef typename Maps::MapHashed Map;
|
|
|
|
|
Map & res = *maps.hashed;
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt128 key = keys_fit_128_bits
|
2015-02-22 05:53:16 +00:00
|
|
|
|
? packFixed<UInt128>(i, keys_size, key_columns, key_sizes)
|
2014-06-12 02:31:30 +00:00
|
|
|
|
: hash128(i, keys_size, key_columns);
|
|
|
|
|
|
2014-06-18 20:08:31 +00:00
|
|
|
|
Inserter<STRICTNESS, Map>::insert(res, key, stored_block, i, pool);
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown JOIN variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-05-26 00:37:48 +00:00
|
|
|
|
void Join::setSampleBlock(const Block & block)
|
2014-06-18 20:08:31 +00:00
|
|
|
|
{
|
2015-01-27 21:24:24 +00:00
|
|
|
|
Poco::ScopedWriteRWLock lock(rwlock);
|
|
|
|
|
|
2015-05-26 00:37:48 +00:00
|
|
|
|
if (!empty())
|
|
|
|
|
return;
|
|
|
|
|
|
2014-07-02 20:23:48 +00:00
|
|
|
|
size_t keys_size = key_names_right.size();
|
2014-06-18 20:08:31 +00:00
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2015-01-29 03:07:58 +00:00
|
|
|
|
key_columns[i] = block.getByName(key_names_right[i]).column;
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
2015-05-26 00:37:48 +00:00
|
|
|
|
/// Выберем, какую структуру данных для множества использовать.
|
|
|
|
|
init(chooseMethod(key_columns, keys_fit_128_bits, key_sizes));
|
|
|
|
|
|
2015-06-09 23:50:22 +00:00
|
|
|
|
sample_block_with_columns_to_add = block;
|
2015-05-26 00:37:48 +00:00
|
|
|
|
|
2015-06-09 23:50:22 +00:00
|
|
|
|
/// Удаляем из sample_block_with_columns_to_add ключевые столбцы.
|
2015-05-26 00:37:48 +00:00
|
|
|
|
for (const auto & name : key_names_right)
|
2015-06-09 23:50:22 +00:00
|
|
|
|
{
|
|
|
|
|
size_t pos = sample_block_with_columns_to_add.getPositionByName(name);
|
|
|
|
|
sample_block_with_keys.insert(sample_block_with_columns_to_add.unsafeGetByPosition(pos));
|
|
|
|
|
sample_block_with_columns_to_add.erase(pos);
|
|
|
|
|
}
|
2015-05-26 00:37:48 +00:00
|
|
|
|
|
2015-06-09 23:50:22 +00:00
|
|
|
|
for (size_t i = 0, size = sample_block_with_columns_to_add.columns(); i < size; ++i)
|
2015-05-26 00:37:48 +00:00
|
|
|
|
{
|
2015-06-09 23:50:22 +00:00
|
|
|
|
auto & column = sample_block_with_columns_to_add.unsafeGetByPosition(i);
|
2015-05-26 00:37:48 +00:00
|
|
|
|
if (!column.column)
|
|
|
|
|
column.column = column.type->createColumn();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool Join::insertFromBlock(const Block & block)
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedWriteRWLock lock(rwlock);
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
|
|
|
|
/// Какую структуру данных для множества использовать?
|
|
|
|
|
if (empty())
|
2015-05-26 00:37:48 +00:00
|
|
|
|
throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
|
|
size_t keys_size = key_names_right.size();
|
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
|
|
|
|
|
2015-06-09 20:55:15 +00:00
|
|
|
|
/// Редкий случай, когда ключи являются константами. Чтобы не поддерживать отдельный код, материализуем их.
|
|
|
|
|
Columns materialized_columns;
|
|
|
|
|
|
2015-05-26 00:37:48 +00:00
|
|
|
|
/// Запоминаем столбцы ключей, с которыми будем работать
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2015-06-09 20:55:15 +00:00
|
|
|
|
{
|
2015-05-26 00:37:48 +00:00
|
|
|
|
key_columns[i] = block.getByName(key_names_right[i]).column;
|
|
|
|
|
|
2015-06-09 20:55:15 +00:00
|
|
|
|
if (key_columns[i]->isConst())
|
|
|
|
|
{
|
|
|
|
|
materialized_columns.emplace_back(dynamic_cast<const IColumnConst &>(*key_columns[i]).convertToFullColumn());
|
|
|
|
|
key_columns[i] = materialized_columns.back();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-26 00:37:48 +00:00
|
|
|
|
size_t rows = block.rows();
|
2014-06-18 20:08:31 +00:00
|
|
|
|
|
|
|
|
|
blocks.push_back(block);
|
|
|
|
|
Block * stored_block = &blocks.back();
|
|
|
|
|
|
|
|
|
|
/// Удаляем из stored_block ключевые столбцы, так как они не нужны.
|
2014-07-02 20:23:48 +00:00
|
|
|
|
for (const auto & name : key_names_right)
|
2014-06-18 20:08:31 +00:00
|
|
|
|
stored_block->erase(stored_block->getPositionByName(name));
|
|
|
|
|
|
2015-06-09 20:55:15 +00:00
|
|
|
|
/// Редкий случай, когда соединяемые столбцы являются константами. Чтобы не поддерживать отдельный код, материализуем их.
|
2015-05-26 00:37:48 +00:00
|
|
|
|
for (size_t i = 0, size = stored_block->columns(); i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
ColumnPtr col = stored_block->getByPosition(i).column;
|
|
|
|
|
if (col->isConst())
|
|
|
|
|
stored_block->getByPosition(i).column = dynamic_cast<IColumnConst &>(*col).convertToFullColumn();
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if (!getFullness(kind))
|
|
|
|
|
{
|
|
|
|
|
if (strictness == ASTJoin::Any)
|
|
|
|
|
insertFromBlockImpl<ASTJoin::Any>(maps_any, rows, key_columns, keys_size, stored_block);
|
|
|
|
|
else
|
|
|
|
|
insertFromBlockImpl<ASTJoin::All>(maps_all, rows, key_columns, keys_size, stored_block);
|
|
|
|
|
}
|
2014-06-18 20:08:31 +00:00
|
|
|
|
else
|
2015-04-17 08:46:06 +00:00
|
|
|
|
{
|
|
|
|
|
if (strictness == ASTJoin::Any)
|
|
|
|
|
insertFromBlockImpl<ASTJoin::Any>(maps_any_full, rows, key_columns, keys_size, stored_block);
|
|
|
|
|
else
|
|
|
|
|
insertFromBlockImpl<ASTJoin::All>(maps_all_full, rows, key_columns, keys_size, stored_block);
|
|
|
|
|
}
|
2014-06-12 02:31:30 +00:00
|
|
|
|
|
|
|
|
|
if (!checkSizeLimits())
|
|
|
|
|
{
|
|
|
|
|
if (overflow_mode == OverflowMode::THROW)
|
|
|
|
|
throw Exception("Join size limit exceeded."
|
|
|
|
|
" Rows: " + toString(getTotalRowCount()) +
|
|
|
|
|
", limit: " + toString(max_rows) +
|
|
|
|
|
". Bytes: " + toString(getTotalByteCount()) +
|
|
|
|
|
", limit: " + toString(max_bytes) + ".",
|
|
|
|
|
ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
|
|
|
|
|
|
|
|
|
if (overflow_mode == OverflowMode::BREAK)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
template <ASTJoin::Kind KIND, ASTJoin::Strictness STRICTNESS, typename Map>
|
2015-04-16 06:12:35 +00:00
|
|
|
|
struct Adder;
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
template <typename Map>
|
|
|
|
|
struct Adder<ASTJoin::Left, ASTJoin::Any, Map>
|
|
|
|
|
{
|
|
|
|
|
static void add(const Map & map, const typename Map::key_type & key, size_t num_columns_to_add, ColumnPlainPtrs & added_columns,
|
|
|
|
|
size_t i, IColumn::Filter * filter, IColumn::Offset_t & current_offset, IColumn::Offsets_t * offsets)
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
typename Map::const_iterator it = map.find(key);
|
|
|
|
|
|
|
|
|
|
if (it != map.end())
|
|
|
|
|
{
|
2015-04-17 08:46:06 +00:00
|
|
|
|
it->second.setUsed();
|
2014-06-19 18:15:46 +00:00
|
|
|
|
for (size_t j = 0; j < num_columns_to_add; ++j)
|
|
|
|
|
added_columns[j]->insertFrom(*it->second.block->unsafeGetByPosition(j).column.get(), it->second.row_num);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (size_t j = 0; j < num_columns_to_add; ++j)
|
|
|
|
|
added_columns[j]->insertDefault();
|
|
|
|
|
}
|
2014-06-17 17:07:22 +00:00
|
|
|
|
}
|
2014-06-19 18:15:46 +00:00
|
|
|
|
};
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
|
|
|
|
template <typename Map>
|
2014-06-19 18:15:46 +00:00
|
|
|
|
struct Adder<ASTJoin::Inner, ASTJoin::Any, Map>
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
static void add(const Map & map, const typename Map::key_type & key, size_t num_columns_to_add, ColumnPlainPtrs & added_columns,
|
|
|
|
|
size_t i, IColumn::Filter * filter, IColumn::Offset_t & current_offset, IColumn::Offsets_t * offsets)
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
typename Map::const_iterator it = map.find(key);
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
if (it != map.end())
|
|
|
|
|
{
|
|
|
|
|
(*filter)[i] = 1;
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
it->second.setUsed();
|
2014-06-19 18:15:46 +00:00
|
|
|
|
for (size_t j = 0; j < num_columns_to_add; ++j)
|
|
|
|
|
added_columns[j]->insertFrom(*it->second.block->unsafeGetByPosition(j).column.get(), it->second.row_num);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
(*filter)[i] = 0;
|
2014-06-17 17:07:22 +00:00
|
|
|
|
}
|
2014-06-19 18:15:46 +00:00
|
|
|
|
};
|
2014-06-18 18:31:35 +00:00
|
|
|
|
|
|
|
|
|
template <ASTJoin::Kind KIND, typename Map>
|
2014-06-19 18:15:46 +00:00
|
|
|
|
struct Adder<KIND, ASTJoin::All, Map>
|
2014-06-18 18:31:35 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
static void add(const Map & map, const typename Map::key_type & key, size_t num_columns_to_add, ColumnPlainPtrs & added_columns,
|
|
|
|
|
size_t i, IColumn::Filter * filter, IColumn::Offset_t & current_offset, IColumn::Offsets_t * offsets)
|
|
|
|
|
{
|
|
|
|
|
typename Map::const_iterator it = map.find(key);
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
if (it != map.end())
|
|
|
|
|
{
|
|
|
|
|
size_t rows_joined = 0;
|
2015-04-17 08:46:06 +00:00
|
|
|
|
it->second.setUsed();
|
|
|
|
|
for (auto current = &static_cast<const typename Map::mapped_type::Base_t &>(it->second); current != nullptr; current = current->next)
|
2014-06-19 18:15:46 +00:00
|
|
|
|
{
|
|
|
|
|
for (size_t j = 0; j < num_columns_to_add; ++j)
|
|
|
|
|
added_columns[j]->insertFrom(*current->block->unsafeGetByPosition(j).column.get(), current->row_num);
|
|
|
|
|
|
|
|
|
|
++rows_joined;
|
|
|
|
|
}
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
current_offset += rows_joined;
|
|
|
|
|
(*offsets)[i] = current_offset;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (KIND == ASTJoin::Inner)
|
|
|
|
|
{
|
|
|
|
|
(*offsets)[i] = current_offset;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
++current_offset;
|
|
|
|
|
(*offsets)[i] = current_offset;
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_columns_to_add; ++j)
|
|
|
|
|
added_columns[j]->insertDefault();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <ASTJoin::Kind KIND, ASTJoin::Strictness STRICTNESS, typename Maps>
|
2015-01-29 03:11:10 +00:00
|
|
|
|
void Join::joinBlockImpl(Block & block, const Maps & maps) const
|
2014-06-12 04:04:47 +00:00
|
|
|
|
{
|
2014-07-02 20:23:48 +00:00
|
|
|
|
size_t keys_size = key_names_left.size();
|
2014-06-12 04:04:47 +00:00
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
|
|
|
|
|
2015-06-09 20:55:15 +00:00
|
|
|
|
/// Редкий случай, когда ключи являются константами. Чтобы не поддерживать отдельный код, материализуем их.
|
|
|
|
|
Columns materialized_columns;
|
|
|
|
|
|
2014-06-12 04:04:47 +00:00
|
|
|
|
/// Запоминаем столбцы ключей, с которыми будем работать
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2015-06-09 20:55:15 +00:00
|
|
|
|
{
|
2015-01-29 03:07:58 +00:00
|
|
|
|
key_columns[i] = block.getByName(key_names_left[i]).column;
|
2014-06-12 04:04:47 +00:00
|
|
|
|
|
2015-06-09 20:55:15 +00:00
|
|
|
|
if (key_columns[i]->isConst())
|
|
|
|
|
{
|
|
|
|
|
materialized_columns.emplace_back(dynamic_cast<const IColumnConst &>(*key_columns[i]).convertToFullColumn());
|
|
|
|
|
key_columns[i] = materialized_columns.back();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-12 04:04:47 +00:00
|
|
|
|
/// Добавляем в блок новые столбцы.
|
2015-06-09 23:50:22 +00:00
|
|
|
|
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
|
2014-06-12 04:04:47 +00:00
|
|
|
|
ColumnPlainPtrs added_columns(num_columns_to_add);
|
|
|
|
|
|
2014-06-18 18:31:35 +00:00
|
|
|
|
size_t existing_columns = block.columns();
|
|
|
|
|
|
2014-06-12 04:04:47 +00:00
|
|
|
|
for (size_t i = 0; i < num_columns_to_add; ++i)
|
|
|
|
|
{
|
2015-07-17 01:27:35 +00:00
|
|
|
|
const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.getByPosition(i);
|
|
|
|
|
ColumnWithTypeAndName new_column = src_column.cloneEmpty();
|
2014-06-12 04:04:47 +00:00
|
|
|
|
block.insert(new_column);
|
|
|
|
|
added_columns[i] = new_column.column;
|
|
|
|
|
added_columns[i]->reserve(src_column.column->size());
|
|
|
|
|
}
|
|
|
|
|
|
2014-06-13 02:05:05 +00:00
|
|
|
|
size_t rows = block.rowsInFirstColumn();
|
2014-06-12 04:04:47 +00:00
|
|
|
|
|
2014-06-18 18:31:35 +00:00
|
|
|
|
/// Используется при ANY INNER JOIN
|
|
|
|
|
std::unique_ptr<IColumn::Filter> filter;
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if ((kind == ASTJoin::Inner || kind == ASTJoin::Right) && strictness == ASTJoin::Any)
|
2014-06-18 18:31:35 +00:00
|
|
|
|
filter.reset(new IColumn::Filter(rows));
|
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
/// Используется при ALL ... JOIN
|
|
|
|
|
IColumn::Offset_t current_offset = 0;
|
|
|
|
|
std::unique_ptr<IColumn::Offsets_t> offsets_to_replicate;
|
|
|
|
|
|
|
|
|
|
if (strictness == ASTJoin::All)
|
|
|
|
|
offsets_to_replicate.reset(new IColumn::Offsets_t(rows));
|
|
|
|
|
|
2015-03-02 01:10:58 +00:00
|
|
|
|
if (type == Type::KEY_64)
|
2014-06-12 04:04:47 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
typedef typename Maps::MapUInt64 Map;
|
|
|
|
|
const Map & map = *maps.key64;
|
2014-06-12 04:04:47 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
UInt64 key = column.get64(i);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
Adder<KIND, STRICTNESS, Map>::add(map, key, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get());
|
2014-06-17 17:07:22 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2015-03-02 01:10:58 +00:00
|
|
|
|
else if (type == Type::KEY_STRING)
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
typedef typename Maps::MapString Map;
|
|
|
|
|
const Map & map = *maps.key_string;
|
2014-06-17 17:07:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2014-06-12 04:04:47 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (const ColumnString * column_string = typeid_cast<const ColumnString *>(&column))
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
|
|
|
|
const ColumnString::Chars_t & data = column_string->getChars();
|
2014-06-12 04:04:47 +00:00
|
|
|
|
|
2014-06-17 17:07:22 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2014-06-12 04:04:47 +00:00
|
|
|
|
{
|
2014-06-17 17:07:22 +00:00
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef key(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
Adder<KIND, STRICTNESS, Map>::add(map, key, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get());
|
2014-06-12 04:04:47 +00:00
|
|
|
|
}
|
2014-06-17 17:07:22 +00:00
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
else if (const ColumnFixedString * column_string = typeid_cast<const ColumnFixedString *>(&column))
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
|
|
|
|
const ColumnFixedString::Chars_t & data = column_string->getChars();
|
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2014-06-12 04:04:47 +00:00
|
|
|
|
{
|
2014-06-17 17:07:22 +00:00
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef key(&data[i * n], n);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
Adder<KIND, STRICTNESS, Map>::add(map, key, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get());
|
2014-06-12 04:04:47 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-17 17:07:22 +00:00
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when creating set with string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
2015-03-02 01:10:58 +00:00
|
|
|
|
else if (type == Type::HASHED)
|
2014-06-17 17:07:22 +00:00
|
|
|
|
{
|
2014-06-19 18:15:46 +00:00
|
|
|
|
typedef typename Maps::MapHashed Map;
|
|
|
|
|
Map & map = *maps.hashed;
|
2014-06-17 17:07:22 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
|
|
|
|
UInt128 key = keys_fit_128_bits
|
2015-02-22 05:53:16 +00:00
|
|
|
|
? packFixed<UInt128>(i, keys_size, key_columns, key_sizes)
|
2014-06-17 17:07:22 +00:00
|
|
|
|
: hash128(i, keys_size, key_columns);
|
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
Adder<KIND, STRICTNESS, Map>::add(map, key, num_columns_to_add, added_columns, i, filter.get(), current_offset, offsets_to_replicate.get());
|
2014-06-17 17:07:22 +00:00
|
|
|
|
}
|
2014-06-12 04:04:47 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown JOIN variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
2014-06-18 18:31:35 +00:00
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
/// Если ANY INNER|RIGHT JOIN - фильтруем все столбцы кроме новых.
|
|
|
|
|
if (filter)
|
2014-06-18 18:31:35 +00:00
|
|
|
|
for (size_t i = 0; i < existing_columns; ++i)
|
|
|
|
|
block.getByPosition(i).column = block.getByPosition(i).column->filter(*filter);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
|
|
|
|
|
/// Если ALL ... JOIN - размножаем все столбцы кроме новых.
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if (offsets_to_replicate)
|
2014-06-19 18:15:46 +00:00
|
|
|
|
for (size_t i = 0; i < existing_columns; ++i)
|
|
|
|
|
block.getByPosition(i).column = block.getByPosition(i).column->replicate(*offsets_to_replicate);
|
2014-06-18 18:31:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-06-09 21:34:45 +00:00
|
|
|
|
void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const
|
|
|
|
|
{
|
|
|
|
|
size_t keys_size = key_names_left.size();
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
if (block_left.getByName(key_names_left[i]).type->getName() != block_right.getByName(key_names_right[i]).type->getName())
|
|
|
|
|
throw Exception("Type mismatch of columns to JOIN by: "
|
|
|
|
|
+ key_names_left[i] + " " + block_left.getByName(key_names_left[i]).type->getName() + " at left, "
|
2015-06-09 23:50:22 +00:00
|
|
|
|
+ key_names_right[i] + " " + block_right.getByName(key_names_right[i]).type->getName() + " at right",
|
2015-06-09 21:34:45 +00:00
|
|
|
|
ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-01-29 03:07:58 +00:00
|
|
|
|
void Join::joinBlock(Block & block) const
|
2014-06-18 18:31:35 +00:00
|
|
|
|
{
|
2015-01-27 21:24:24 +00:00
|
|
|
|
Poco::ScopedReadRWLock lock(rwlock);
|
|
|
|
|
|
2015-06-09 23:50:22 +00:00
|
|
|
|
checkTypesOfKeys(block, sample_block_with_keys);
|
2015-06-09 21:34:45 +00:00
|
|
|
|
|
2014-06-19 18:15:46 +00:00
|
|
|
|
if (kind == ASTJoin::Left && strictness == ASTJoin::Any)
|
2015-04-17 08:46:06 +00:00
|
|
|
|
joinBlockImpl<ASTJoin::Left, ASTJoin::Any>(block, maps_any);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
else if (kind == ASTJoin::Inner && strictness == ASTJoin::Any)
|
2015-04-17 08:46:06 +00:00
|
|
|
|
joinBlockImpl<ASTJoin::Inner, ASTJoin::Any>(block, maps_any);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
else if (kind == ASTJoin::Left && strictness == ASTJoin::All)
|
2015-04-17 08:46:06 +00:00
|
|
|
|
joinBlockImpl<ASTJoin::Left, ASTJoin::All>(block, maps_all);
|
2014-06-19 18:15:46 +00:00
|
|
|
|
else if (kind == ASTJoin::Inner && strictness == ASTJoin::All)
|
2015-04-17 08:46:06 +00:00
|
|
|
|
joinBlockImpl<ASTJoin::Inner, ASTJoin::All>(block, maps_all);
|
|
|
|
|
else if (kind == ASTJoin::Full && strictness == ASTJoin::Any)
|
|
|
|
|
joinBlockImpl<ASTJoin::Left, ASTJoin::Any>(block, maps_any_full);
|
|
|
|
|
else if (kind == ASTJoin::Right && strictness == ASTJoin::Any)
|
|
|
|
|
joinBlockImpl<ASTJoin::Inner, ASTJoin::Any>(block, maps_any_full);
|
|
|
|
|
else if (kind == ASTJoin::Full && strictness == ASTJoin::All)
|
|
|
|
|
joinBlockImpl<ASTJoin::Left, ASTJoin::All>(block, maps_all_full);
|
|
|
|
|
else if (kind == ASTJoin::Right && strictness == ASTJoin::All)
|
|
|
|
|
joinBlockImpl<ASTJoin::Inner, ASTJoin::All>(block, maps_all_full);
|
2014-06-12 04:04:47 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-04-16 11:40:18 +00:00
|
|
|
|
void Join::joinTotals(Block & block) const
|
|
|
|
|
{
|
|
|
|
|
Block totals_without_keys = totals;
|
|
|
|
|
|
|
|
|
|
if (totals_without_keys)
|
|
|
|
|
{
|
|
|
|
|
for (const auto & name : key_names_right)
|
|
|
|
|
totals_without_keys.erase(totals_without_keys.getPositionByName(name));
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < totals_without_keys.columns(); ++i)
|
|
|
|
|
block.insert(totals_without_keys.getByPosition(i));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Будем присоединять пустые totals - из одной строчки со значениями по-умолчанию.
|
2015-06-09 23:50:22 +00:00
|
|
|
|
totals_without_keys = sample_block_with_columns_to_add.cloneEmpty();
|
2015-04-16 11:40:18 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < totals_without_keys.columns(); ++i)
|
|
|
|
|
{
|
|
|
|
|
totals_without_keys.getByPosition(i).column->insertDefault();
|
|
|
|
|
block.insert(totals_without_keys.getByPosition(i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
template <ASTJoin::Strictness STRICTNESS, typename Mapped>
|
|
|
|
|
struct AdderNonJoined;
|
|
|
|
|
|
|
|
|
|
template <typename Mapped>
|
|
|
|
|
struct AdderNonJoined<ASTJoin::Any, Mapped>
|
|
|
|
|
{
|
|
|
|
|
static void add(const Mapped & mapped,
|
|
|
|
|
size_t num_columns_left, ColumnPlainPtrs & columns_left,
|
|
|
|
|
size_t num_columns_right, ColumnPlainPtrs & columns_right)
|
|
|
|
|
{
|
|
|
|
|
for (size_t j = 0; j < num_columns_left; ++j)
|
|
|
|
|
columns_left[j]->insertDefault();
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_columns_right; ++j)
|
|
|
|
|
columns_right[j]->insertFrom(*mapped.block->unsafeGetByPosition(j).column.get(), mapped.row_num);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename Mapped>
|
|
|
|
|
struct AdderNonJoined<ASTJoin::All, Mapped>
|
|
|
|
|
{
|
|
|
|
|
static void add(const Mapped & mapped,
|
|
|
|
|
size_t num_columns_left, ColumnPlainPtrs & columns_left,
|
|
|
|
|
size_t num_columns_right, ColumnPlainPtrs & columns_right)
|
|
|
|
|
{
|
|
|
|
|
for (auto current = &static_cast<const typename Mapped::Base_t &>(mapped); current != nullptr; current = current->next)
|
|
|
|
|
{
|
|
|
|
|
for (size_t j = 0; j < num_columns_left; ++j)
|
|
|
|
|
columns_left[j]->insertDefault();
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < num_columns_right; ++j)
|
|
|
|
|
columns_right[j]->insertFrom(*current->block->unsafeGetByPosition(j).column.get(), current->row_num);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// Поток из неприсоединённых ранее строк правой таблицы.
|
|
|
|
|
class NonJoinedBlockInputStream : public IProfilingBlockInputStream
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
NonJoinedBlockInputStream(const Join & parent_, Block & left_sample_block_, size_t max_block_size_)
|
|
|
|
|
: parent(parent_), left_sample_block(left_sample_block_), max_block_size(max_block_size_)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-08 20:22:02 +00:00
|
|
|
|
String getName() const override { return "NonJoined"; }
|
2015-04-17 08:46:06 +00:00
|
|
|
|
|
|
|
|
|
String getID() const override
|
|
|
|
|
{
|
|
|
|
|
std::stringstream res;
|
|
|
|
|
res << "NonJoined(" << &parent << ")";
|
|
|
|
|
return res.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
Block readImpl() override
|
|
|
|
|
{
|
|
|
|
|
if (parent.blocks.empty())
|
|
|
|
|
return Block();
|
|
|
|
|
|
|
|
|
|
if (parent.strictness == ASTJoin::Any)
|
|
|
|
|
return createBlock<ASTJoin::Any>(parent.maps_any_full);
|
|
|
|
|
else if (parent.strictness == ASTJoin::All)
|
|
|
|
|
return createBlock<ASTJoin::All>(parent.maps_all_full);
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
const Join & parent;
|
|
|
|
|
Block left_sample_block;
|
|
|
|
|
size_t max_block_size;
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <ASTJoin::Strictness STRICTNESS, typename Maps>
|
|
|
|
|
Block createBlock(const Maps & maps)
|
|
|
|
|
{
|
|
|
|
|
Block block = left_sample_block.cloneEmpty();
|
|
|
|
|
|
|
|
|
|
size_t num_columns_left = left_sample_block.columns();
|
|
|
|
|
ColumnPlainPtrs columns_left(num_columns_left);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_columns_left; ++i)
|
2015-04-18 22:33:24 +00:00
|
|
|
|
{
|
|
|
|
|
auto & column_with_name_and_type = block.getByPosition(i);
|
|
|
|
|
column_with_name_and_type.column = column_with_name_and_type.type->createColumn();
|
|
|
|
|
columns_left[i] = column_with_name_and_type.column.get();
|
|
|
|
|
}
|
2015-04-17 08:46:06 +00:00
|
|
|
|
|
|
|
|
|
/// Добавляем в блок новые столбцы.
|
2015-06-09 23:50:22 +00:00
|
|
|
|
size_t num_columns_right = parent.sample_block_with_columns_to_add.columns();
|
2015-04-17 08:46:06 +00:00
|
|
|
|
ColumnPlainPtrs columns_right(num_columns_right);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_columns_right; ++i)
|
|
|
|
|
{
|
2015-07-17 01:27:35 +00:00
|
|
|
|
const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i);
|
|
|
|
|
ColumnWithTypeAndName new_column = src_column.cloneEmpty();
|
2015-04-17 08:46:06 +00:00
|
|
|
|
block.insert(new_column);
|
|
|
|
|
columns_right[i] = new_column.column;
|
|
|
|
|
columns_right[i]->reserve(src_column.column->size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t rows_added = 0;
|
|
|
|
|
if (parent.type == Join::Type::KEY_64)
|
|
|
|
|
rows_added = fillColumns<STRICTNESS>(*maps.key64, num_columns_left, columns_left, num_columns_right, columns_right);
|
|
|
|
|
else if (parent.type == Join::Type::KEY_STRING)
|
|
|
|
|
rows_added = fillColumns<STRICTNESS>(*maps.key_string, num_columns_left, columns_left, num_columns_right, columns_right);
|
|
|
|
|
else if (parent.type == Join::Type::HASHED)
|
|
|
|
|
rows_added = fillColumns<STRICTNESS>(*maps.hashed, num_columns_left, columns_left, num_columns_right, columns_right);
|
|
|
|
|
else
|
|
|
|
|
throw Exception("Unknown JOIN variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
|
|
|
|
|
2015-04-18 22:33:24 +00:00
|
|
|
|
std::cerr << "rows added: " << rows_added << "\n";
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if (!rows_added)
|
|
|
|
|
return Block();
|
|
|
|
|
|
2015-04-18 22:33:24 +00:00
|
|
|
|
std::cerr << block.dumpStructure() << "\n";
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
return block;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <ASTJoin::Strictness STRICTNESS, typename Map>
|
|
|
|
|
size_t fillColumns(const Map & map,
|
|
|
|
|
size_t num_columns_left, ColumnPlainPtrs & columns_left,
|
|
|
|
|
size_t num_columns_right, ColumnPlainPtrs & columns_right)
|
|
|
|
|
{
|
|
|
|
|
size_t rows_added = 0;
|
|
|
|
|
|
|
|
|
|
if (!position)
|
|
|
|
|
position = decltype(position)(
|
|
|
|
|
static_cast<void *>(new typename Map::const_iterator(map.begin())),
|
|
|
|
|
[](void * ptr) { delete reinterpret_cast<typename Map::const_iterator *>(ptr); });
|
|
|
|
|
|
|
|
|
|
auto & it = *reinterpret_cast<typename Map::const_iterator *>(position.get());
|
|
|
|
|
auto end = map.end();
|
|
|
|
|
|
|
|
|
|
for (; it != end; ++it)
|
|
|
|
|
{
|
2015-04-18 22:33:24 +00:00
|
|
|
|
std::cerr << it->second.getUsed() << "\n";
|
|
|
|
|
|
2015-04-17 08:46:06 +00:00
|
|
|
|
if (it->second.getUsed())
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->second, num_columns_left, columns_left, num_columns_right, columns_right);
|
|
|
|
|
|
|
|
|
|
++rows_added;
|
|
|
|
|
if (rows_added == max_block_size)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rows_added;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BlockInputStreamPtr Join::createStreamWithNonJoinedRows(Block & left_sample_block, size_t max_block_size) const
|
|
|
|
|
{
|
|
|
|
|
return new NonJoinedBlockInputStream(*this, left_sample_block, max_block_size);
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-16 11:40:18 +00:00
|
|
|
|
|
2014-06-12 02:31:30 +00:00
|
|
|
|
}
|