Improvement

This commit is contained in:
stavrolia 2019-09-04 22:55:56 +03:00
parent 32bf915610
commit 5d6959173c
9 changed files with 56 additions and 32 deletions

View File

@ -16,60 +16,61 @@ namespace DB
*/
std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs)
{
std::ostringstream oss;
std::ostringstream oss_for_escaping;
/// Escaping only characters that not used in glob syntax
for (const auto & letter : initial_str_with_globs)
{
if ((letter == '[') || (letter == ']') || (letter == '|') || (letter == '+') || (letter == '-') || (letter == '(') || (letter == ')'))
oss << '\\';
oss << letter;
oss_for_escaping << '\\';
oss_for_escaping << letter;
}
std::string escaped_with_globs = oss.str();
oss.str("");
std::string escaped_with_globs = oss_for_escaping.str();
static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without {}*,
re2::StringPiece input(escaped_with_globs);
re2::StringPiece matched;
std::ostringstream oss_for_replacing;
size_t current_index = 0;
while (RE2::FindAndConsume(&input, enum_or_range, &matched))
{
std::string buffer = matched.ToString();
oss << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';
if (buffer.find(',') == std::string::npos)
{
size_t range_begin, range_end;
char point;
std::istringstream iss(buffer);
iss >> range_begin >> point >> point >> range_end;
oss << range_begin;
std::istringstream iss_range(buffer);
iss_range >> range_begin >> point >> point >> range_end;
oss_for_replacing << range_begin;
for (size_t i = range_begin + 1; i <= range_end; ++i)
{
oss << '|' << i;
oss_for_replacing << '|' << i;
}
}
else
{
std::replace(buffer.begin(), buffer.end(), ',', '|');
oss << buffer;
oss_for_replacing << buffer;
}
oss << ")";
oss_for_replacing << ")";
current_index = input.data() - escaped_with_globs.data();
}
oss << escaped_with_globs.substr(current_index);
std::string almost_res = oss.str();
oss.str("");
oss_for_replacing << escaped_with_globs.substr(current_index);
std::string almost_res = oss_for_replacing.str();
std::ostringstream oss_final_processing;
for (const auto & letter : almost_res)
{
if ((letter == '?') || (letter == '*'))
{
oss << "[^/]"; /// '?' is any symbol except '/'
oss_final_processing << "[^/]"; /// '?' is any symbol except '/'
if (letter == '?')
continue;
}
if ((letter == '.') || (letter == '{') || (letter == '}'))
oss << '\\';
oss << letter;
oss_final_processing << '\\';
oss_final_processing << letter;
}
return oss.str();
return oss_final_processing.str();
}
}

View File

@ -152,13 +152,16 @@ StorageFile::StorageFile(
if (db_dir_path.empty())
throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME);
paths[0] = getTablePath(db_dir_path, table_name, format_name);
paths.push_back(getTablePath(db_dir_path, table_name, format_name));
is_db_table = true;
Poco::File(Poco::Path(paths[0]).parent()).createDirectories();
Poco::File(Poco::Path(paths.back()).parent()).createDirectories();
}
}
else /// Will use FD
{
if (paths.size() != 1)
throw Exception("Table '" + table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
checkCreationIsAllowed(context_global, db_dir_path, paths[0], table_fd);
is_db_table = false;
@ -266,6 +269,8 @@ public:
explicit StorageFileBlockOutputStream(StorageFile & storage_)
: storage(storage_), lock(storage.rwlock)
{
if (storage.paths.size() != 1)
throw Exception("Table '" + storage.table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
if (storage.use_table_fd)
{
/** NOTE: Using real file binded to FD may be misleading:
@ -277,7 +282,6 @@ public:
}
else
{
if (storage.paths.size() != 1) throw Exception("Table '" + storage.table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
write_buf = std::make_unique<WriteBufferFromFile>(storage.paths[0], DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
}
@ -320,6 +324,12 @@ BlockOutputStreamPtr StorageFile::write(
return std::make_shared<StorageFileBlockOutputStream>(*this);
}
String StorageFile::getDataPath() const
{
if (paths.size() != 1)
throw Exception("Table '" + table_name + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED);
return paths[0];
}
void StorageFile::drop()
{

View File

@ -41,7 +41,7 @@ public:
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
String getDataPath() const override { return paths[0]; }
String getDataPath() const override;
protected:
friend class StorageFileBlockInputStream;
@ -70,7 +70,7 @@ private:
int table_fd = -1;
std::vector<std::string> paths{""};
std::vector<std::string> paths;
bool is_db_table = true; /// Table is stored in real database, not user's file
bool use_table_fd = false; /// Use table_fd insted of path

View File

@ -155,15 +155,16 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c
const size_t last_slash = full_path.rfind('/');
const String file_name = full_path.substr(last_slash);
const bool looking_for_directory = next_slash != std::string::npos;
const bool is_directory = ls.file_info[i].mKind == 'D';
/// Condition with type of current file_info means what kind of path is it in current iteration of ls
if ((ls.file_info[i].mKind == 'F') && !looking_for_directory)
if (!is_directory && !looking_for_directory)
{
if (re2::RE2::FullMatch(file_name, matcher))
{
result.push_back(String(ls.file_info[i].mName));
}
}
else if ((ls.file_info[i].mKind == 'D') && looking_for_directory)
else if (is_directory && looking_for_directory)
{
if (re2::RE2::FullMatch(file_name, matcher))
{
@ -187,7 +188,6 @@ BlockInputStreams StorageHDFS::read(
size_t max_block_size,
unsigned /*num_streams*/)
{
Strings path_parts;
const size_t begin_of_path = uri.find('/', uri.find("//") + 2);
const String path_from_uri = uri.substr(begin_of_path);
const String uri_without_path = uri.substr(0, begin_of_path);

View File

@ -820,7 +820,7 @@ You can select data from a ClickHouse table and save them into some file in the
clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
```
To exchange data with the Hadoop, you can use `HDFS` table engine.
To exchange data with the Hadoop, you can use [`HDFS` table engine](../../operations/table_engines/hdfs.md).
## Format Schema {#formatschema}

View File

@ -8,7 +8,7 @@ to the [File](file.md) and [URL](url.md) engine.
```
ENGINE = HDFS(URI, format)
```
The `URI` parameter is the whole file URI in HDFS.
The `format` parameter specifies one of the available file formats. To perform
`SELECT` queries, the format must be supported for input, and to perform
`INSERT` queries -- for output. The available formats are listed in the
@ -21,7 +21,13 @@ The `format` parameter specifies one of the available file formats. To perform
``` sql
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
```
**2.** Query the data:
**2.** Fill file:
``` sql
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Query the data:
``` sql
SELECT * FROM hdfs_engine_table LIMIT 2

View File

@ -45,6 +45,7 @@ Engines of the family:
- [MySQL](mysql.md)
- [ODBC](odbc.md)
- [JDBC](jdbc.md)
- [HDFS](hdfs.md)
### Special engines

View File

@ -752,7 +752,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq
clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
```
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц `HDFS` и `URL`.
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../../operations/table_engines/hdfs.md) и `URL`.
## Схема формата {#formatschema}

View File

@ -8,6 +8,7 @@
ENGINE = HDFS(URI, format)
```
В параметр `URI` нужно передавать полный URI файла в HDFS.
Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../interfaces/formats.md#formats).
**Пример:**
@ -18,7 +19,12 @@ ENGINE = HDFS(URI, format)
CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV')
```
**2.** Запросим данные:
**2.** Заполним файл:
``` sql
INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3)
```
**3.** Запросим данные:
``` sql
SELECT * FROM hdfs_engine_table LIMIT 2