mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Improvement
This commit is contained in:
parent
32bf915610
commit
5d6959173c
@ -16,60 +16,61 @@ namespace DB
|
||||
*/
|
||||
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
std::ostringstream oss_for_escaping;
|
||||
/// Escaping only characters that not used in glob syntax
|
||||
for (const auto & letter : initial_str_with_globs)
|
||||
{
|
||||
if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')'))
|
||||
oss << '\\';
|
||||
oss << letter;
|
||||
oss_for_escaping << '\\';
|
||||
oss_for_escaping << letter;
|
||||
}
|
||||
std::string escaped_with_globs = oss.str();
|
||||
oss.str("");
|
||||
std::string escaped_with_globs = oss_for_escaping.str();
|
||||
|
||||
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
|
||||
re2::StringPiece input(escaped_with_globs);
|
||||
re2::StringPiece matched;
|
||||
std::ostringstream oss_for_replacing;
|
||||
size_t current_index = 0;
|
||||
while (RE2::FindAndConsume(&input, enum_or_range, &matched))
|
||||
{
|
||||
std::string buffer = matched.ToString();
|
||||
oss << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
|
||||
oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
|
||||
|
||||
if (buffer.find(',') == std::string::npos)
|
||||
{
|
||||
size_t range_begin, range_end;
|
||||
char point;
|
||||
std::istringstream iss(buffer);
|
||||
iss >> range_begin >> point >> point >> range_end;
|
||||
oss << range_begin;
|
||||
std::istringstream iss_range(buffer);
|
||||
iss_range >> range_begin >> point >> point >> range_end;
|
||||
oss_for_replacing << range_begin;
|
||||
for (size_t i = range_begin + 1; i <= range_end; ++i)
|
||||
{
|
||||
oss << '|' << i;
|
||||
oss_for_replacing << '|' << i;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::replace(buffer.begin(), buffer.end(), ',', '|');
|
||||
oss << buffer;
|
||||
oss_for_replacing << buffer;
|
||||
}
|
||||
oss << ")";
|
||||
oss_for_replacing << ")";
|
||||
current_index = input.data() - escaped_with_globs.data();
|
||||
}
|
||||
oss << escaped_with_globs.substr(current_index);
|
||||
std::string almost_res = oss.str();
|
||||
oss.str("");
|
||||
oss_for_replacing << escaped_with_globs.substr(current_index);
|
||||
std::string almost_res = oss_for_replacing.str();
|
||||
std::ostringstream oss_final_processing;
|
||||
for (const auto & letter : almost_res)
|
||||
{
|
||||
if ((letter == '?') || (letter == '*'))
|
||||
{
|
||||
oss << "[^/]"; /// '?' is any symbol except '/'
|
||||
oss_final_processing << "[^/]"; /// '?' is any symbol except '/'
|
||||
if (letter == '?')
|
||||
continue;
|
||||
}
|
||||
if ((letter == '.') || (letter == '{') || (letter == '}'))
|
||||
oss << '\\';
|
||||
oss << letter;
|
||||
oss_final_processing << '\\';
|
||||
oss_final_processing << letter;
|
||||
}
|
||||
return oss.str();
|
||||
return oss_final_processing.str();
|
||||
}
|
||||
}
|
||||
|
@ -152,13 +152,16 @@ StorageFile::StorageFile(
|
||||
if (db_dir_path.empty())
|
||||
throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
|
||||
paths[0] = getTablePath(db_dir_path, table_name, format_name);
|
||||
paths.push_back(getTablePath(db_dir_path, table_name, format_name));
|
||||
is_db_table = true;
|
||||
Poco::File(Poco::Path(paths[0]).parent()).createDirectories();
|
||||
Poco::File(Poco::Path(paths.back()).parent()).createDirectories();
|
||||
}
|
||||
}
|
||||
else /// Will use FD
|
||||
{
|
||||
if (paths.size() != 1)
|
||||
throw Exception("Table '" + table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
|
||||
checkCreationIsAllowed(context_global, db_dir_path, paths[0], table_fd);
|
||||
|
||||
is_db_table = false;
|
||||
@ -266,6 +269,8 @@ public:
|
||||
explicit StorageFileBlockOutputStream(StorageFile & storage_)
|
||||
: storage(storage_), lock(storage.rwlock)
|
||||
{
|
||||
if (storage.paths.size() != 1)
|
||||
throw Exception("Table '" + storage.table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
if (storage.use_table_fd)
|
||||
{
|
||||
/** NOTE: Using real file binded to FD may be misleading:
|
||||
@ -277,7 +282,6 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (storage.paths.size() != 1) throw Exception("Table '" + storage.table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
write_buf = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
|
||||
}
|
||||
|
||||
@ -320,6 +324,12 @@ BlockOutputStreamPtr StorageFile::write(
|
||||
return std::make_shared<StorageFileBlockOutputStream>(*this);
|
||||
}
|
||||
|
||||
String StorageFile::getDataPath() const
|
||||
{
|
||||
if (paths.size() != 1)
|
||||
throw Exception("Table '" + table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
return paths[0];
|
||||
}
|
||||
|
||||
void StorageFile::drop()
|
||||
{
|
||||
|
@ -41,7 +41,7 @@ public:
|
||||
|
||||
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
|
||||
|
||||
String getDataPath() const override { return paths[0]; }
|
||||
String getDataPath() const override;
|
||||
|
||||
protected:
|
||||
friend class StorageFileBlockInputStream;
|
||||
@ -70,7 +70,7 @@ private:
|
||||
|
||||
int table_fd = -1;
|
||||
|
||||
std::vector<std::string> paths{""};
|
||||
std::vector<std::string> paths;
|
||||
|
||||
bool is_db_table = true; /// Table is stored in real database, not user's file
|
||||
bool use_table_fd = false; /// Use table_fd insted of path
|
||||
|
@ -155,15 +155,16 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c
|
||||
const size_t last_slash = full_path.rfind('/');
|
||||
const String file_name = full_path.substr(last_slash);
|
||||
const bool looking_for_directory = next_slash != std::string::npos;
|
||||
const bool is_directory = ls.file_info[i].mKind == 'D';
|
||||
/// Condition with type of current file_info means what kind of path is it in current iteration of ls
|
||||
if ((ls.file_info[i].mKind == 'F') && !looking_for_directory)
|
||||
if (!is_directory && !looking_for_directory)
|
||||
{
|
||||
if (re2::RE2::FullMatch(file_name, matcher))
|
||||
{
|
||||
result.push_back(String(ls.file_info[i].mName));
|
||||
}
|
||||
}
|
||||
else if ((ls.file_info[i].mKind == 'D') && looking_for_directory)
|
||||
else if (is_directory && looking_for_directory)
|
||||
{
|
||||
if (re2::RE2::FullMatch(file_name, matcher))
|
||||
{
|
||||
@ -187,7 +188,6 @@ BlockInputStreams StorageHDFS::read(
|
||||
size_t max_block_size,
|
||||
unsigned /*num_streams*/)
|
||||
{
|
||||
Strings path_parts;
|
||||
const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
|
||||
const String path_from_uri = uri.substr(begin_of_path);
|
||||
const String uri_without_path = uri.substr(0, begin_of_path);
|
||||
|
@ -820,7 +820,7 @@ You can select data from a ClickHouse table and save them into some file in the
|
||||
clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
|
||||
```
|
||||
|
||||
To exchange data with the Hadoop, you can use `HDFS` table engine.
|
||||
To exchange data with the Hadoop, you can use [`HDFS` table engine](../../operations/table_engines/hdfs.md).
|
||||
|
||||
## Format Schema {#formatschema}
|
||||
|
||||
|
@ -8,7 +8,7 @@ to the [File](file.md) and [URL](url.md) engine.
|
||||
```
|
||||
ENGINE = HDFS(URI, format)
|
||||
```
|
||||
|
||||
The `URI` parameter is the whole file URI in HDFS.
|
||||
The `format` parameter specifies one of the available file formats. To perform
|
||||
`SELECT` queries, the format must be supported for input, and to perform
|
||||
`INSERT` queries -- for output. The available formats are listed in the
|
||||
@ -21,7 +21,13 @@ The `format` parameter specifies one of the available file formats. To perform
|
||||
``` sql
|
||||
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
|
||||
```
|
||||
**2.** Query the data:
|
||||
|
||||
**2.** Fill file:
|
||||
``` sql
|
||||
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
|
||||
```
|
||||
|
||||
**3.** Query the data:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM hdfs_engine_table LIMIT 2
|
||||
|
@ -45,6 +45,7 @@ Engines of the family:
|
||||
- [MySQL](mysql.md)
|
||||
- [ODBC](odbc.md)
|
||||
- [JDBC](jdbc.md)
|
||||
- [HDFS](hdfs.md)
|
||||
|
||||
### Special engines
|
||||
|
||||
|
@ -752,7 +752,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq
|
||||
clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
|
||||
```
|
||||
|
||||
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц `HDFS` и `URL`.
|
||||
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../../operations/table_engines/hdfs.md) и `URL`.
|
||||
|
||||
## Схема формата {#formatschema}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
ENGINE = HDFS(URI, format)
|
||||
```
|
||||
|
||||
В параметр `URI` нужно передавать полный URI файла в HDFS.
|
||||
Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../interfaces/formats.md#formats).
|
||||
|
||||
**Пример:**
|
||||
@ -18,7 +19,12 @@ ENGINE = HDFS(URI, format)
|
||||
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
|
||||
```
|
||||
|
||||
**2.** Запросим данные:
|
||||
**2.** Заполним файл:
|
||||
``` sql
|
||||
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
|
||||
```
|
||||
|
||||
**3.** Запросим данные:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM hdfs_engine_table LIMIT 2
|
||||
|
Loading…
Reference in New Issue
Block a user