2014-01-28 16:45:10 +00:00
|
|
|
#pragma once
|
|
|
|
|
2019-03-11 14:01:45 +00:00
|
|
|
#include <Parsers/IAST_fwd.h>
|
2019-05-17 14:34:25 +00:00
|
|
|
#include <Storages/IStorage_fwd.h>
|
2020-10-14 12:19:29 +00:00
|
|
|
#include <Storages/ColumnsDescription.h>
|
2022-03-17 13:14:57 +00:00
|
|
|
#include <Access/Common/AccessType.h>
|
2023-04-28 10:10:42 +00:00
|
|
|
#include <Common/FunctionDocumentation.h>
|
2023-01-26 10:52:40 +00:00
|
|
|
#include <Analyzer/IQueryTreeNode.h>
|
2019-03-11 14:01:45 +00:00
|
|
|
|
2017-01-21 04:24:28 +00:00
|
|
|
#include <memory>
|
2019-05-17 14:34:25 +00:00
|
|
|
#include <string>
|
2014-01-28 16:45:10 +00:00
|
|
|
|
2019-03-11 14:01:45 +00:00
|
|
|
|
2014-01-28 16:45:10 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-01-21 04:24:28 +00:00
|
|
|
class Context;
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/** Interface for table functions.
|
2014-01-28 16:45:10 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* Table functions are not relevant to other functions.
|
|
|
|
* The table function can be specified in the FROM section instead of the [db.]Table
|
|
|
|
* The table function returns a temporary StoragePtr object that is used to execute the query.
|
2014-01-28 16:45:10 +00:00
|
|
|
*
|
2017-04-16 15:00:33 +00:00
|
|
|
* Example:
|
2014-01-28 16:45:10 +00:00
|
|
|
* SELECT count() FROM remote('example01-01-1', merge, hits)
|
2017-04-16 15:00:33 +00:00
|
|
|
* - go to `example01-01-1`, in `merge` database, `hits` table.
|
2020-10-14 12:19:29 +00:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* When creating table AS table_function(...) we probably don't know structure of the table
|
|
|
|
* and have to request if from remote server, because structure is required to create a Storage.
|
|
|
|
* To avoid failures on server startup, we write obtained structure to metadata file.
|
|
|
|
* So, table function may have two different columns lists:
|
|
|
|
* - cached_columns written to metadata
|
|
|
|
* - the list returned from getActualTableStructure(...)
|
|
|
|
* See StorageTableFunctionProxy.
|
2014-01-28 16:45:10 +00:00
|
|
|
*/
|
|
|
|
|
2020-10-14 12:19:29 +00:00
|
|
|
class ITableFunction : public std::enable_shared_from_this<ITableFunction>
|
2014-01-28 16:45:10 +00:00
|
|
|
{
|
|
|
|
public:
|
2019-07-09 15:40:21 +00:00
|
|
|
static inline std::string getDatabaseName() { return "_table_function"; }
|
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
/// Get the main function name.
|
2014-01-28 16:45:10 +00:00
|
|
|
virtual std::string getName() const = 0;
|
|
|
|
|
2020-10-14 12:19:29 +00:00
|
|
|
/// Returns true if we always know table structure when executing table function
|
|
|
|
/// (e.g. structure is specified in table function arguments)
|
|
|
|
virtual bool hasStaticStructure() const { return false; }
|
|
|
|
/// Returns false if storage returned by table function supports type conversion (e.g. StorageDistributed)
|
|
|
|
virtual bool needStructureConversion() const { return true; }
|
|
|
|
|
2023-01-26 10:52:40 +00:00
|
|
|
/** Return array of table function arguments indexes for which query tree analysis must be skipped.
|
|
|
|
* It is important for table functions that take subqueries, because otherwise analyzer will resolve them.
|
|
|
|
*/
|
|
|
|
virtual std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & /*query_node_table_function*/, ContextPtr /*context*/) const { return {}; }
|
|
|
|
|
2021-04-10 23:33:54 +00:00
|
|
|
virtual void parseArguments(const ASTPtr & /*ast_function*/, ContextPtr /*context*/) {}
|
2020-10-14 12:19:29 +00:00
|
|
|
|
|
|
|
/// Returns actual table structure probably requested from remote server, may fail
|
2023-07-06 08:56:07 +00:00
|
|
|
virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/, bool is_insert_query) const = 0;
|
2020-10-14 12:19:29 +00:00
|
|
|
|
2022-02-18 16:19:42 +00:00
|
|
|
/// Check if table function needs a structure hint from SELECT query in case of
|
2022-10-14 15:09:35 +00:00
|
|
|
/// INSERT INTO FUNCTION ... SELECT ... and INSERT INTO ... SELECT ... FROM table_function(...)
|
2022-02-18 16:19:42 +00:00
|
|
|
/// It's used for schema inference.
|
|
|
|
virtual bool needStructureHint() const { return false; }
|
|
|
|
|
|
|
|
/// Set a structure hint from SELECT query in case of
|
2022-10-14 15:09:35 +00:00
|
|
|
/// INSERT INTO FUNCTION ... SELECT ... and INSERT INTO ... SELECT ... FROM table_function(...)
|
2022-02-18 16:19:42 +00:00
|
|
|
/// This hint could be used not to repeat schema in function arguments.
|
|
|
|
virtual void setStructureHint(const ColumnsDescription &) {}
|
|
|
|
|
2022-11-25 19:33:47 +00:00
|
|
|
/// Used for table functions that can use structure hint during INSERT INTO ... SELECT ... FROM table_function(...)
|
|
|
|
/// It returns possible virtual column names of corresponding storage. If select query contains
|
|
|
|
/// one of these columns, the structure from insertion table won't be used as a structure hint,
|
|
|
|
/// because we cannot determine which column from table correspond to this virtual column.
|
|
|
|
virtual std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const { return {}; }
|
|
|
|
|
2023-09-11 14:55:37 +00:00
|
|
|
virtual bool supportsReadingSubsetOfColumns(const ContextPtr &) { return true; }
|
2022-10-14 15:09:35 +00:00
|
|
|
|
2018-03-02 05:03:28 +00:00
|
|
|
/// Create storage according to the query.
|
2021-04-10 23:33:54 +00:00
|
|
|
StoragePtr
|
2023-01-03 14:25:04 +00:00
|
|
|
execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false, bool is_insert = false) const;
|
2014-02-23 00:37:25 +00:00
|
|
|
|
2021-03-19 12:47:27 +00:00
|
|
|
virtual ~ITableFunction() = default;
|
2018-03-02 05:03:28 +00:00
|
|
|
|
2022-09-04 16:58:39 +00:00
|
|
|
protected:
|
|
|
|
virtual AccessType getSourceAccessType() const;
|
|
|
|
|
2018-03-02 05:03:28 +00:00
|
|
|
private:
|
2021-04-10 23:33:54 +00:00
|
|
|
virtual StoragePtr executeImpl(
|
2023-07-06 08:56:07 +00:00
|
|
|
const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const = 0;
|
2022-03-17 13:14:57 +00:00
|
|
|
|
2020-04-06 05:19:40 +00:00
|
|
|
virtual const char * getStorageTypeName() const = 0;
|
2014-01-28 16:45:10 +00:00
|
|
|
};
|
|
|
|
|
2022-10-26 16:45:23 +00:00
|
|
|
/// Properties of table function that are independent of argument types and parameters.
|
|
|
|
struct TableFunctionProperties
|
|
|
|
{
|
2023-04-28 10:10:42 +00:00
|
|
|
FunctionDocumentation documentation;
|
2022-11-20 06:47:20 +00:00
|
|
|
|
|
|
|
/** It is determined by the possibility of modifying any data or making requests to arbitrary hostnames.
|
|
|
|
*
|
|
|
|
* If users can make a request to an arbitrary hostname, they can get the info from the internal network
|
|
|
|
* or manipulate internal APIs (say - put some data into Memcached, which is available only in the corporate network).
|
|
|
|
* This is named "SSRF attack".
|
|
|
|
* Or a user can use an open ClickHouse server to amplify DoS attacks.
|
|
|
|
*
|
|
|
|
* In those cases, the table function should not be allowed in readonly mode.
|
|
|
|
*/
|
2022-10-26 16:45:23 +00:00
|
|
|
bool allow_readonly = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-05-28 10:15:36 +00:00
|
|
|
using TableFunctionPtr = std::shared_ptr<ITableFunction>;
|
2014-01-28 16:45:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|