#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { class TableJoin; class DictionaryReader; namespace JoinStuff { /// Flags needed to implement RIGHT and FULL JOINs. class JoinUsedFlags { std::vector flags; bool need_flags; public: /// Update size for vector with flags. /// Calling this method invalidates existing flags. /// It can be called several times, but all of them should happen before using this structure. template void reinit(size_t size_); bool getUsedSafe(size_t i) const; template void setUsed(size_t i); template bool getUsed(size_t i); template bool setUsedOnce(size_t i); }; } /** Data structure for implementation of JOIN. * It is just a hash table: keys -> rows of joined ("right") table. * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys. * * JOIN-s could be of these types: * - ALL × LEFT/INNER/RIGHT/FULL * - ANY × LEFT/INNER/RIGHT * - SEMI/ANTI x LEFT/RIGHT * - ASOF x LEFT/INNER * - CROSS * * ALL means usual JOIN, when rows are multiplied by number of matching rows from the "right" table. * ANY uses one line per unique key from right table. For LEFT JOIN it would be any row (with needed joined key) from the right table, * for RIGHT JOIN it would be any row from the left table and for INNER one it would be any row from right and any row from left. * SEMI JOIN filter left table by keys that are present in right table for LEFT JOIN, and filter right table by keys from left table * for RIGHT JOIN. In other words SEMI JOIN returns only rows which joining keys present in another table. * ANTI JOIN is the same as SEMI JOIN but returns rows with joining keys that are NOT present in another table. * SEMI/ANTI JOINs allow to get values from both tables. For filter table it gets any row with joining same key. For ANTI JOIN it returns * defaults other table columns. * ASOF JOIN is not-equi join. For one key column it finds nearest value to join according to join inequality. * It's expected that ANY|SEMI LEFT JOIN is more efficient that ALL one. * * If INNER is specified - leave only rows that have matching rows from "right" table. * If LEFT is specified - in case when there is no matching row in "right" table, fill it with default values instead. * If RIGHT is specified - first process as INNER, but track what rows from the right table was joined, * and at the end, add rows from right table that was not joined and substitute default values for columns of left table. * If FULL is specified - first process as LEFT, but track what rows from the right table was joined, * and at the end, add rows from right table that was not joined and substitute default values for columns of left table. * * Thus, LEFT and RIGHT JOINs are not symmetric in terms of implementation. * * All JOINs (except CROSS) are done by equality condition on keys (equijoin). * Non-equality and other conditions are not supported. * * Implementation: * * 1. Build hash table in memory from "right" table. * This hash table is in form of keys -> row in case of ANY or keys -> [rows...] in case of ALL. * This is done in insertFromBlock method. * * 2. Process "left" table and join corresponding rows from "right" table by lookups in the map. * This is done in joinBlock methods. * * In case of ANY LEFT JOIN - form new columns with found values or default values. * This is the most simple. Number of rows in left table does not change. * * In case of ANY INNER JOIN - form new columns with found values, * and also build a filter - in what rows nothing was found. * Then filter columns of "left" table. * * In case of ALL ... JOIN - form new columns with all found rows, * and also fill 'offsets' array, describing how many times we need to replicate values of "left" table. * Then replicate columns of "left" table. * * How Nullable keys are processed: * * NULLs never join to anything, even to each other. * During building of map, we just skip keys with NULL value of any component. * During joining, we simply treat rows with any NULLs in key as non joined. * * Default values for outer joins (LEFT, RIGHT, FULL): * * Behaviour is controlled by 'join_use_nulls' settings. * If it is false, we substitute (global) default value for the data type, for non-joined rows * (zero, empty string, etc. and NULL for Nullable data types). * If it is true, we always generate Nullable column and substitute NULLs for non-joined rows, * as in standard SQL. */ class HashJoin : public IJoin { public: HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ bool addJoinedBlock(const Block & block, bool check_limits) override; /** Join data from the map (that was previously built by calls to addJoinedBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; /// Check joinGet arguments and infer the return type. DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; /// Used by joinGet function that turns StorageJoin into a dictionary. ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const; /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ void setTotals(const Block & block) override { totals = block; } const Block & getTotals() const override { return totals; } bool isFilled() const override { return from_storage_join || data->type == Type::DICT; } /** For RIGHT and FULL JOINs. * A stream that will contain default values from left table, joined with rows from right table, that was not joined before. * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ std::shared_ptr getNonJoinedBlocks(const Block & result_sample_block, UInt64 max_block_size) const override; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const final; /// Sum size in bytes of all buffers, used for JOIN maps and for all memory pools. size_t getTotalByteCount() const final; bool alwaysReturnsEmptySet() const final; ASTTableJoin::Kind getKind() const { return kind; } ASTTableJoin::Strictness getStrictness() const { return strictness; } const std::optional & getAsofType() const { return asof_type; } ASOF::Inequality getAsofInequality() const { return asof_inequality; } bool anyTakeLastRow() const { return any_take_last_row; } const ColumnWithTypeAndName & rightAsofKeyColumn() const { /// It should be nullable if nullable_right_side is true return savedBlockSample().getByName(key_names_right.back()); } /// Different types of keys for maps. #define APPLY_FOR_JOIN_VARIANTS(M) \ M(key8) \ M(key16) \ M(key32) \ M(key64) \ M(key_string) \ M(key_fixed_string) \ M(keys128) \ M(keys256) \ M(hashed) /// Used for reading from StorageJoin and applying joinGet function #define APPLY_FOR_JOIN_VARIANTS_LIMITED(M) \ M(key8) \ M(key16) \ M(key32) \ M(key64) \ M(key_string) \ M(key_fixed_string) enum class Type { EMPTY, CROSS, DICT, #define M(NAME) NAME, APPLY_FOR_JOIN_VARIANTS(M) #undef M }; /** Different data structures, that are used to perform JOIN. */ template struct MapsTemplate { std::unique_ptr> key8; std::unique_ptr> key16; std::unique_ptr>> key32; std::unique_ptr>> key64; std::unique_ptr> key_string; std::unique_ptr> key_fixed_string; std::unique_ptr> keys128; std::unique_ptr> keys256; std::unique_ptr> hashed; void create(Type which) { switch (which) { case Type::EMPTY: break; case Type::CROSS: break; case Type::DICT: break; #define M(NAME) \ case Type::NAME: NAME = std::make_unique(); break; APPLY_FOR_JOIN_VARIANTS(M) #undef M } } size_t getTotalRowCount(Type which) const { switch (which) { case Type::EMPTY: return 0; case Type::CROSS: return 0; case Type::DICT: return 0; #define M(NAME) \ case Type::NAME: return NAME ? NAME->size() : 0; APPLY_FOR_JOIN_VARIANTS(M) #undef M } __builtin_unreachable(); } size_t getTotalByteCountImpl(Type which) const { switch (which) { case Type::EMPTY: return 0; case Type::CROSS: return 0; case Type::DICT: return 0; #define M(NAME) \ case Type::NAME: return NAME ? NAME->getBufferSizeInBytes() : 0; APPLY_FOR_JOIN_VARIANTS(M) #undef M } __builtin_unreachable(); } size_t getBufferSizeInCells(Type which) const { switch (which) { case Type::EMPTY: return 0; case Type::CROSS: return 0; case Type::DICT: return 0; #define M(NAME) \ case Type::NAME: return NAME ? NAME->getBufferSizeInCells() : 0; APPLY_FOR_JOIN_VARIANTS(M) #undef M } __builtin_unreachable(); } }; using MapsOne = MapsTemplate; using MapsAll = MapsTemplate; using MapsAsof = MapsTemplate; using MapsVariant = std::variant; using BlockNullmapList = std::deque>; struct RightTableData { Type type = Type::EMPTY; bool empty = true; MapsVariant maps; Block sample_block; /// Block as it would appear in the BlockList BlocksList blocks; /// Blocks of "right" table. BlockNullmapList blocks_nullmaps; /// Nullmaps for blocks of "right" table (if needed) /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. Arena pool; }; /// We keep correspondence between used_flags and hash table internal buffer. /// Hash table cannot be modified during HashJoin lifetime and must be protected with lock. void setLock(std::shared_mutex & rwlock) { storage_join_lock = std::shared_lock(rwlock); } void reuseJoinedData(const HashJoin & join); std::shared_ptr getJoinedData() const { return data; } bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); } private: friend class NotJoinedHash; friend class JoinSource; std::shared_ptr table_join; ASTTableJoin::Kind kind; ASTTableJoin::Strictness strictness; /// This join was created from StorageJoin and it is already filled. bool from_storage_join = false; /// Names of key columns in right-side table (in the order they appear in ON/USING clause). @note It could contain duplicates. const Names & key_names_right; bool nullable_right_side; /// In case of LEFT and FULL joins, if use_nulls, convert right-side columns to Nullable. bool nullable_left_side; /// In case of RIGHT and FULL joins, if use_nulls, convert left-side columns to Nullable. bool any_take_last_row; /// Overwrite existing values when encountering the same key again std::optional asof_type; ASOF::Inequality asof_inequality; /// Right table data. StorageJoin shares it between many Join objects. std::shared_ptr data; /// Flags that indicate that particular row already used in join. /// Flag is stored for every record in hash map. /// Number of this flags equals to hashtable buffer size (plus one for zero value). /// Changes in hash table broke correspondence, /// so we must guarantee constantness of hash table during HashJoin lifetime (using method setLock) mutable JoinStuff::JoinUsedFlags used_flags; Sizes key_sizes; /// Block with columns from the right-side table. Block right_sample_block; /// Block with columns from the right-side table except key columns. Block sample_block_with_columns_to_add; /// Block with key columns in the same order they appear in the right-side table (duplicates appear once). Block right_table_keys; /// Block with key columns right-side table keys that are needed in result (would be attached after joined columns). Block required_right_keys; /// Left table column names that are sources for required_right_keys columns std::vector required_right_keys_sources; /// Additional conditions for rows to join from JOIN ON section String condition_mask_column_name_left; String condition_mask_column_name_right; Poco::Logger * log; Block totals; /// Should be set via setLock to protect hash table from modification from StorageJoin /// If set HashJoin instance is not available for modification (addJoinedBlock) std::shared_lock storage_join_lock; void init(Type type_); const Block & savedBlockSample() const { return data->sample_block; } /// Modify (structure) right block to save it in block list Block structureRightBlock(const Block & stored_block) const; void initRightBlockStructure(Block & saved_block_sample); template void joinBlockImpl( Block & block, const Names & key_names_left, const Block & block_with_columns_to_add, const Maps & maps, bool is_join_get = false) const; void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const; static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes); bool empty() const; bool overDictionary() const; }; }