mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Merge branch 'master' into revert-revert-drop-cache
This commit is contained in:
commit
7d609ca21d
@ -95,6 +95,7 @@ RUN python3 -m pip install --no-cache-dir \
|
||||
pytest-timeout \
|
||||
pytest-xdist \
|
||||
pytz \
|
||||
pyyaml==5.3.1 \
|
||||
redis \
|
||||
requests-kerberos \
|
||||
tzlocal==2.1 \
|
||||
|
@ -4578,3 +4578,28 @@ Type: Int64
|
||||
|
||||
Default: 0
|
||||
|
||||
## precise_float_parsing {#precise_float_parsing}
|
||||
|
||||
Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms:
|
||||
* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input.
|
||||
* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits.
|
||||
|
||||
Possible values: `0`, `1`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
|
||||
|
||||
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
|
||||
│ 1.7090999999999998 │ 15008753.000000002 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
|
||||
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
|
||||
|
||||
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
|
||||
│ 1.7091 │ 15008753 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
@ -4213,3 +4213,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
|
||||
- Запрос: `SELECT * FROM file('sample.csv')`
|
||||
|
||||
Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`.
|
||||
|
||||
## precise_float_parsing {#precise_float_parsing}
|
||||
|
||||
Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md):
|
||||
* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению.
|
||||
* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры.
|
||||
|
||||
Возможные значения: `0`, `1`.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
Пример:
|
||||
|
||||
```sql
|
||||
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0;
|
||||
|
||||
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
|
||||
│ 1.7090999999999998 │ 15008753.000000002 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
|
||||
SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1;
|
||||
|
||||
┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐
|
||||
│ 1.7091 │ 15008753 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
@ -45,6 +45,25 @@ size_t shortest_literal_length(const Literals & literals)
|
||||
return shortest;
|
||||
}
|
||||
|
||||
const char * skipNameCapturingGroup(const char * pos, size_t offset, const char * end)
|
||||
{
|
||||
const char special = *(pos + offset) == '<' ? '>' : '\'';
|
||||
offset ++;
|
||||
while (pos + offset < end)
|
||||
{
|
||||
const char cur = *(pos + offset);
|
||||
if (cur == special)
|
||||
{
|
||||
return pos + offset;
|
||||
}
|
||||
if (('0' <= cur && cur <= '9') || ('a' <= cur && cur <= 'z') || ('A' <= cur && cur <= 'Z'))
|
||||
offset ++;
|
||||
else
|
||||
return pos;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
const char * analyzeImpl(
|
||||
std::string_view regexp,
|
||||
const char * pos,
|
||||
@ -247,10 +266,15 @@ const char * analyzeImpl(
|
||||
break;
|
||||
}
|
||||
}
|
||||
/// (?:regex) means non-capturing parentheses group
|
||||
if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
|
||||
{
|
||||
pos += 2;
|
||||
}
|
||||
if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<')))
|
||||
{
|
||||
pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end);
|
||||
}
|
||||
Literal group_required_substr;
|
||||
bool group_is_trival = true;
|
||||
Literals group_alters;
|
||||
|
@ -47,4 +47,8 @@ TEST(OptimizeRE, analyze)
|
||||
test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
|
||||
test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
|
||||
test_f(R"(\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z)", "/k8s1");
|
||||
test_f("[a-zA-Z]+(?P<num>\\d+)", "");
|
||||
test_f("[a-zA-Z]+(?<num>\\d+)", "");
|
||||
test_f("[a-zA-Z]+(?'num'\\d+)", "");
|
||||
test_f("[a-zA-Z]+(?x<num>\\d+)", "x<num>");
|
||||
}
|
||||
|
@ -1031,7 +1031,8 @@ class IColumn;
|
||||
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
|
||||
\
|
||||
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
|
||||
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
|
||||
M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
|
||||
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
|
||||
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
// Please add settings non-related to formats into the COMMON_SETTINGS above.
|
||||
|
@ -1040,13 +1040,21 @@ inline void convertFromTime<DataTypeDateTime>(DataTypeDateTime::FieldType & x, t
|
||||
/** Conversion of strings to numbers, dates, datetimes: through parsing.
|
||||
*/
|
||||
template <typename DataType>
|
||||
void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing)
|
||||
{
|
||||
readText(x, rb);
|
||||
if constexpr (std::is_floating_point_v<typename DataType::FieldType>)
|
||||
{
|
||||
if (precise_float_parsing)
|
||||
readFloatTextPrecise(x, rb);
|
||||
else
|
||||
readFloatTextFast(x, rb);
|
||||
}
|
||||
else
|
||||
readText(x, rb);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void parseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline void parseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
DayNum tmp(0);
|
||||
readDateText(tmp, rb, *time_zone);
|
||||
@ -1054,7 +1062,7 @@ inline void parseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void parseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline void parseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
ExtendedDayNum tmp(0);
|
||||
readDateText(tmp, rb, *time_zone);
|
||||
@ -1064,7 +1072,7 @@ inline void parseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer
|
||||
|
||||
// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code.
|
||||
template <>
|
||||
inline void parseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline void parseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
time_t time = 0;
|
||||
readDateTimeText(time, rb, *time_zone);
|
||||
@ -1072,7 +1080,7 @@ inline void parseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuf
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void parseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline void parseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
UUID tmp;
|
||||
readUUIDText(tmp, rb);
|
||||
@ -1080,7 +1088,7 @@ inline void parseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void parseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline void parseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
IPv4 tmp;
|
||||
readIPv4Text(tmp, rb);
|
||||
@ -1088,7 +1096,7 @@ inline void parseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer & rb
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void parseImpl<DataTypeIPv6>(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline void parseImpl<DataTypeIPv6>(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
IPv6 tmp;
|
||||
readIPv6Text(tmp, rb);
|
||||
@ -1096,16 +1104,21 @@ inline void parseImpl<DataTypeIPv6>(DataTypeIPv6::FieldType & x, ReadBuffer & rb
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing)
|
||||
{
|
||||
if constexpr (std::is_floating_point_v<typename DataType::FieldType>)
|
||||
return tryReadFloatText(x, rb);
|
||||
{
|
||||
if (precise_float_parsing)
|
||||
return tryReadFloatTextPrecise(x, rb);
|
||||
else
|
||||
return tryReadFloatTextFast(x, rb);
|
||||
}
|
||||
else /*if constexpr (is_integer_v<typename DataType::FieldType>)*/
|
||||
return tryReadIntText(x, rb);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline bool tryParseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
DayNum tmp(0);
|
||||
if (!tryReadDateText(tmp, rb, *time_zone))
|
||||
@ -1115,7 +1128,7 @@ inline bool tryParseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer &
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline bool tryParseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
ExtendedDayNum tmp(0);
|
||||
if (!tryReadDateText(tmp, rb, *time_zone))
|
||||
@ -1125,7 +1138,7 @@ inline bool tryParseImpl<DataTypeDate32>(DataTypeDate32::FieldType & x, ReadBuff
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
|
||||
inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool)
|
||||
{
|
||||
time_t tmp = 0;
|
||||
if (!tryReadDateTimeText(tmp, rb, *time_zone))
|
||||
@ -1135,7 +1148,7 @@ inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, Read
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline bool tryParseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
UUID tmp;
|
||||
if (!tryReadUUIDText(tmp, rb))
|
||||
@ -1146,7 +1159,7 @@ inline bool tryParseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer &
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline bool tryParseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
IPv4 tmp;
|
||||
if (!tryReadIPv4Text(tmp, rb))
|
||||
@ -1157,7 +1170,7 @@ inline bool tryParseImpl<DataTypeIPv4>(DataTypeIPv4::FieldType & x, ReadBuffer &
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool tryParseImpl<DataTypeIPv6>(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
|
||||
inline bool tryParseImpl<DataTypeIPv6>(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool)
|
||||
{
|
||||
IPv6 tmp;
|
||||
if (!tryReadIPv6Text(tmp, rb))
|
||||
@ -1336,6 +1349,16 @@ struct ConvertThroughParsing
|
||||
|
||||
size_t current_offset = 0;
|
||||
|
||||
bool precise_float_parsing = false;
|
||||
|
||||
if (DB::CurrentThread::isInitialized())
|
||||
{
|
||||
const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
|
||||
|
||||
if (query_context)
|
||||
precise_float_parsing = query_context->getSettingsRef().precise_float_parsing;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t next_offset = std::is_same_v<FromDataType, DataTypeString> ? (*offsets)[i] : (current_offset + fixed_string_size);
|
||||
@ -1402,7 +1425,7 @@ struct ConvertThroughParsing
|
||||
}
|
||||
}
|
||||
|
||||
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
|
||||
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
@ -1472,7 +1495,7 @@ struct ConvertThroughParsing
|
||||
}
|
||||
}
|
||||
|
||||
parsed = tryParseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
|
||||
parsed = tryParseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
|
||||
} while (false);
|
||||
}
|
||||
}
|
||||
|
@ -529,6 +529,11 @@ void tryReadIntTextUnsafe(T & x, ReadBuffer & buf)
|
||||
template <typename T> void readFloatText(T & x, ReadBuffer & in);
|
||||
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in);
|
||||
|
||||
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in);
|
||||
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in);
|
||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in);
|
||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in);
|
||||
|
||||
|
||||
/// simple: all until '\n' or '\t'
|
||||
void readString(String & s, ReadBuffer & buf);
|
||||
|
@ -18,7 +18,7 @@ import docker_server as ds
|
||||
|
||||
class TestDockerImageCheck(unittest.TestCase):
|
||||
docker_images_path = os.path.join(
|
||||
os.path.dirname(__file__), "tests/docker_images.json"
|
||||
os.path.dirname(__file__), "tests/docker_images_for_tests.json"
|
||||
)
|
||||
|
||||
def test_get_changed_docker_images(self):
|
||||
|
@ -43,6 +43,7 @@ def get_fake_zk(node):
|
||||
return ku.get_fake_zk(cluster, node)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="test is flaky because changes are not properly waited for")
|
||||
def test_reconfig_replace_leader_in_one_command(started_cluster):
|
||||
"""
|
||||
Remove leader from a cluster of 3 and add a new node to this cluster in a single command
|
||||
|
@ -1 +1,2 @@
|
||||
1
|
||||
1
|
||||
|
@ -1 +1,2 @@
|
||||
select match('default/k8s1', '\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z');
|
||||
select match('abc123', '[a-zA-Z]+(?P<num>\\d+)');
|
||||
|
2
tests/queries/0_stateless/02813_float_parsing.reference
Normal file
2
tests/queries/0_stateless/02813_float_parsing.reference
Normal file
@ -0,0 +1,2 @@
|
||||
1.7090999999999998 15008753.000000002 6.000000000000001e-9 6.000000000000002e-9 1.7091 15008752 5.9999996e-9 5.9999996e-9
|
||||
1.7091 15008753 6e-9 6.000000000000001e-9 1.7091 15008753 6e-9 6e-9
|
21
tests/queries/0_stateless/02813_float_parsing.sql
Normal file
21
tests/queries/0_stateless/02813_float_parsing.sql
Normal file
@ -0,0 +1,21 @@
|
||||
SELECT
|
||||
toFloat64('1.7091'),
|
||||
toFloat64('1.5008753E7'),
|
||||
toFloat64('6e-09'),
|
||||
toFloat64('6.000000000000001e-9'),
|
||||
toFloat32('1.7091'),
|
||||
toFloat32('1.5008753E7'),
|
||||
toFloat32('6e-09'),
|
||||
toFloat32('6.000000000000001e-9')
|
||||
SETTINGS precise_float_parsing = 0;
|
||||
|
||||
SELECT
|
||||
toFloat64('1.7091'),
|
||||
toFloat64('1.5008753E7'),
|
||||
toFloat64('6e-09'),
|
||||
toFloat64('6.000000000000001e-9'),
|
||||
toFloat32('1.7091'),
|
||||
toFloat32('1.5008753E7'),
|
||||
toFloat32('6e-09'),
|
||||
toFloat32('6.000000000000001e-9')
|
||||
SETTINGS precise_float_parsing = 1;
|
Loading…
Reference in New Issue
Block a user