mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 07:01:59 +00:00
Experiment on parsing backslash escapes in a more convenient way #10922
This commit is contained in:
parent
c858f4d89c
commit
b2057159c1
@ -283,7 +283,9 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
|
||||
if (buf.eof())
|
||||
throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
|
||||
|
||||
if (*buf.position() == 'x')
|
||||
char char_after_backslash = *buf.position();
|
||||
|
||||
if (char_after_backslash == 'x')
|
||||
{
|
||||
++buf.position();
|
||||
/// escape sequence of the form \xAA
|
||||
@ -291,7 +293,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
|
||||
readPODBinary(hex_code, buf);
|
||||
s.push_back(unhex2(hex_code));
|
||||
}
|
||||
else if (*buf.position() == 'N')
|
||||
else if (char_after_backslash == 'N')
|
||||
{
|
||||
/// Support for NULLs: \N sequence must be parsed as empty string.
|
||||
++buf.position();
|
||||
@ -299,7 +301,15 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf)
|
||||
else
|
||||
{
|
||||
/// The usual escape sequence of a single character.
|
||||
s.push_back(parseEscapeSequence(*buf.position()));
|
||||
char decoded_char = parseEscapeSequence(char_after_backslash);
|
||||
|
||||
/// For convenience using LIKE and regular expressions,
|
||||
/// we leave backslash when user write something like 'Hello 100\%':
|
||||
/// it is parsed like Hello 100\% instead of Hello 100%
|
||||
if (decoded_char != '\\' && !isControlASCII(decoded_char))
|
||||
s.push_back('\\');
|
||||
|
||||
s.push_back(decoded_char);
|
||||
++buf.position();
|
||||
}
|
||||
}
|
||||
|
@ -37,9 +37,6 @@ target_link_libraries (parse_int_perf2 PRIVATE clickhouse_common_io)
|
||||
add_executable (read_write_int read_write_int.cpp)
|
||||
target_link_libraries (read_write_int PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (mempbrk mempbrk.cpp)
|
||||
target_link_libraries (mempbrk PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp)
|
||||
target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io)
|
||||
|
||||
|
@ -1,90 +0,0 @@
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
|
||||
#include <common/find_symbols.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace test
|
||||
{
|
||||
static void readEscapedString(DB::String & s, DB::ReadBuffer & buf)
|
||||
{
|
||||
s = "";
|
||||
while (!buf.eof())
|
||||
{
|
||||
const char * next_pos = find_first_symbols<'\b', '\f', '\n', '\r', '\t', '\0', '\\'>(buf.position(), buf.buffer().end());
|
||||
|
||||
s.append(buf.position(), next_pos - buf.position());
|
||||
buf.position() += next_pos - buf.position();
|
||||
|
||||
if (!buf.hasPendingData())
|
||||
continue;
|
||||
|
||||
if (*buf.position() == '\t' || *buf.position() == '\n')
|
||||
return;
|
||||
|
||||
if (*buf.position() == '\\')
|
||||
{
|
||||
++buf.position();
|
||||
if (buf.eof())
|
||||
throw DB::Exception("Cannot parse escape sequence", DB::ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE);
|
||||
s += DB::parseEscapeSequence(*buf.position());
|
||||
++buf.position();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
try
|
||||
{
|
||||
DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||
// DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);
|
||||
std::string s;
|
||||
size_t rows = 0;
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
test::readEscapedString(s, in);
|
||||
in.ignore();
|
||||
|
||||
++rows;
|
||||
|
||||
/* DB::writeEscapedString(s, out);
|
||||
DB::writeChar('\n', out);*/
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
std::cerr << std::fixed << std::setprecision(2)
|
||||
<< "Read " << rows << " rows (" << in.count() / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., "
|
||||
<< rows / watch.elapsedSeconds() << " rows/sec. (" << in.count() / watch.elapsedSeconds() / 1000000 << " MB/s.)"
|
||||
<< std::endl;
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
std::cerr << e.what() << ", " << e.displayText() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e
|
||||
1 0 1 1
|
||||
1 1 0 0 0
|
@ -0,0 +1,6 @@
|
||||
SELECT 'a\_\c\l\i\c\k\h\o\u\s\e', 'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e';
|
||||
select 'aXb' like 'a_b', 'aXb' like 'a\_b', 'a_b' like 'a\_b', 'a_b' like 'a\\_b';
|
||||
SELECT match('Hello', '\w+'), match('Hello', '\\w+'), match('Hello', '\\\w+'), match('Hello', '\w\+'), match('Hello', 'w+');
|
||||
|
||||
SELECT match('Hello', '\He\l\l\o'); -- { serverError 427 }
|
||||
SELECT match('Hello', '\H\e\l\l\o'); -- { serverError 427 }
|
Loading…
Reference in New Issue
Block a user