mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
try
This commit is contained in:
parent
39c73cb448
commit
6c9f5e4991
@ -15,6 +15,12 @@ std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D
|
|||||||
|
|
||||||
while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
|
while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
|
||||||
{
|
{
|
||||||
|
const auto current_object_size = memory.size() + static_cast<size_t>(pos - in.position());
|
||||||
|
if (current_object_size > 10 * min_chunk_size)
|
||||||
|
throw ParsingException("Size of JSON object is extremely large. Expected not greater than " +
|
||||||
|
std::to_string(min_chunk_size) + " bytes, but current is " + std::to_string(current_object_size) +
|
||||||
|
" bytes. Increase the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually", ErrorCodes::INCORRECT_DATA);
|
||||||
|
|
||||||
if (quotes)
|
if (quotes)
|
||||||
{
|
{
|
||||||
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
|
pos = find_first_symbols<'\\', '"'>(pos, in.buffer().end());
|
||||||
|
@ -1104,9 +1104,9 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current)
|
|||||||
assert(current >= in.position());
|
assert(current >= in.position());
|
||||||
assert(current <= in.buffer().end());
|
assert(current <= in.buffer().end());
|
||||||
|
|
||||||
const int old_bytes = memory.size();
|
const size_t old_bytes = memory.size();
|
||||||
const int additional_bytes = current - in.position();
|
const size_t additional_bytes = current - in.position();
|
||||||
const int new_bytes = old_bytes + additional_bytes;
|
const size_t new_bytes = old_bytes + additional_bytes;
|
||||||
/// There are no new bytes to add to memory.
|
/// There are no new bytes to add to memory.
|
||||||
/// No need to do extra stuff.
|
/// No need to do extra stuff.
|
||||||
if (new_bytes == 0)
|
if (new_bytes == 0)
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
CURDIR = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
|
||||||
|
|
||||||
|
from pure_http_client import ClickHouseClient
|
||||||
|
|
||||||
|
|
@ -0,0 +1 @@
|
|||||||
|
Ok.
|
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
# shellcheck source=../shell_config.sh
|
||||||
|
. "$CURDIR"/../shell_config.sh
|
||||||
|
|
||||||
|
python3 -c "print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 10000000, 'dbms' * 100000000))" > big_json.json
|
||||||
|
|
||||||
|
clickhouse-local --input_format_parallel_parsing=1 --max_memory_usage=0 -q "select count() from file('big_json.json', 'JSONEachRow', 'a String, b String')" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||:
|
Loading…
Reference in New Issue
Block a user