mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge branch 'master' into rocksdb_bypass_memtable
Try fixing weird CI errors
This commit is contained in:
commit
adad010345
2
.github/workflows/jepsen.yml
vendored
2
.github/workflows/jepsen.yml
vendored
@ -8,13 +8,13 @@ on: # yamllint disable-line rule:truthy
|
||||
schedule:
|
||||
- cron: '0 */6 * * *'
|
||||
workflow_dispatch:
|
||||
workflow_call:
|
||||
jobs:
|
||||
KeeperJepsenRelease:
|
||||
uses: ./.github/workflows/reusable_simple_job.yml
|
||||
with:
|
||||
test_name: Jepsen keeper check
|
||||
runner_type: style-checker
|
||||
report_required: true
|
||||
run_command: |
|
||||
python3 jepsen_check.py keeper
|
||||
# ServerJepsenRelease:
|
||||
|
10
.github/workflows/master.yml
vendored
10
.github/workflows/master.yml
vendored
@ -15,6 +15,8 @@ jobs:
|
||||
outputs:
|
||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
@ -33,11 +35,9 @@ jobs:
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
echo "::group::configure CI run"
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --rebuild-all-binaries --outfile ${{ runner.temp }}/ci_run_data.json
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "::group::CI run configure results"
|
||||
echo "::group::CI configuration"
|
||||
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
|
||||
echo "::endgroup::"
|
||||
|
||||
@ -255,9 +255,9 @@ jobs:
|
||||
run_command: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 docker_server.py --release-type head \
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server
|
||||
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
|
||||
python3 docker_server.py --release-type head \
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
|
||||
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
|
||||
############################################################################################
|
||||
##################################### BUILD REPORTER #######################################
|
||||
############################################################################################
|
||||
|
28
.github/workflows/pull_request.yml
vendored
28
.github/workflows/pull_request.yml
vendored
@ -22,6 +22,8 @@ jobs:
|
||||
outputs:
|
||||
data: ${{ steps.runconfig.outputs.CI_DATA }}
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
@ -44,11 +46,9 @@ jobs:
|
||||
- name: PrepareRunConfig
|
||||
id: runconfig
|
||||
run: |
|
||||
echo "::group::configure CI run"
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --configure --outfile ${{ runner.temp }}/ci_run_data.json
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "::group::CI run configure results"
|
||||
echo "::group::CI configuration"
|
||||
python3 -m json.tool ${{ runner.temp }}/ci_run_data.json
|
||||
echo "::endgroup::"
|
||||
|
||||
@ -67,6 +67,7 @@ jobs:
|
||||
DOCKER_TAG=$(echo '${{ toJson(fromJson(steps.runconfig.outputs.CI_DATA).docker_data.images) }}' | tr -d '\n')
|
||||
export DOCKER_TAG=$DOCKER_TAG
|
||||
python3 ./tests/ci/style_check.py --no-push
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --post --job-name 'Style check'
|
||||
BuildDockers:
|
||||
needs: [RunConfig]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
@ -796,7 +797,7 @@ jobs:
|
||||
test_name: Unit tests (asan)
|
||||
runner_type: fuzzer-unit-tester
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
UnitTestsReleaseClang:
|
||||
UnitTestsRelease:
|
||||
needs: [RunConfig, BuilderBinRelease]
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
@ -923,7 +924,7 @@ jobs:
|
||||
- UnitTestsTsan
|
||||
- UnitTestsMsan
|
||||
- UnitTestsUBsan
|
||||
- UnitTestsReleaseClang
|
||||
- UnitTestsRelease
|
||||
- CompatibilityCheckX86
|
||||
- CompatibilityCheckAarch64
|
||||
- SQLancerTestRelease
|
||||
@ -966,13 +967,20 @@ jobs:
|
||||
#############################################################################################
|
||||
###################################### JEPSEN TESTS #########################################
|
||||
#############################################################################################
|
||||
# This is special test NOT INCLUDED in FinishCheck
|
||||
# When it's skipped, all dependent tasks will be skipped too.
|
||||
# DO NOT add it there
|
||||
Jepsen:
|
||||
# This is special test NOT INCLUDED in FinishCheck
|
||||
# When it's skipped, all dependent tasks will be skipped too.
|
||||
# DO NOT add it there
|
||||
if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'jepsen-test') }}
|
||||
# we need concurrency as the job uses dedicated instances in the cloud
|
||||
concurrency:
|
||||
group: jepsen
|
||||
if: ${{ !failure() && !cancelled() }}
|
||||
needs: [RunConfig, BuilderBinRelease]
|
||||
uses: ./.github/workflows/jepsen.yml
|
||||
uses: ./.github/workflows/reusable_test.yml
|
||||
with:
|
||||
test_name: ClickHouse Keeper Jepsen
|
||||
runner_type: style-checker
|
||||
data: ${{ needs.RunConfig.outputs.data }}
|
||||
#############################################################################################
|
||||
####################################### libFuzzer ###########################################
|
||||
#############################################################################################
|
||||
|
10
.github/workflows/reusable_build.yml
vendored
10
.github/workflows/reusable_build.yml
vendored
@ -58,6 +58,7 @@ jobs:
|
||||
- name: Apply sparse checkout for contrib # in order to check that it doesn't break build
|
||||
# This step is done in GITHUB_WORKSPACE,
|
||||
# because it's broken in REPO_COPY for some reason
|
||||
# See also update-submodules.sh
|
||||
if: ${{ env.BUILD_SPARSE_CHECKOUT == 'true' }}
|
||||
run: |
|
||||
rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
|
||||
@ -72,12 +73,15 @@ jobs:
|
||||
- name: Pre
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.build_name}}'
|
||||
- name: Build
|
||||
- name: Run
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/build_check.py" "$BUILD_NAME"
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
|
||||
--infile ${{ toJson(inputs.data) }} \
|
||||
--job-name "$BUILD_NAME" \
|
||||
--run
|
||||
- name: Post
|
||||
# it still be build report to upload for failed build job
|
||||
if: always()
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.build_name}}'
|
||||
- name: Mark as done
|
||||
|
18
.github/workflows/reusable_simple_job.yml
vendored
18
.github/workflows/reusable_simple_job.yml
vendored
@ -34,12 +34,16 @@ name: Simple job
|
||||
working-directory:
|
||||
description: sets custom working directory
|
||||
type: string
|
||||
default: ""
|
||||
default: "$GITHUB_WORKSPACE/tests/ci"
|
||||
git_ref:
|
||||
description: commit to use, merge commit for pr or head
|
||||
required: false
|
||||
type: string
|
||||
default: ${{ github.event.after }} # no merge commit
|
||||
report_required:
|
||||
description: set to true if job report with the commit status required
|
||||
type: boolean
|
||||
default: false
|
||||
secrets:
|
||||
secret_envs:
|
||||
description: if given, it's passed to the environments
|
||||
@ -58,6 +62,8 @@ jobs:
|
||||
env:
|
||||
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
|
||||
steps:
|
||||
- name: DebugInfo
|
||||
uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
@ -79,12 +85,12 @@ jobs:
|
||||
job_type: test
|
||||
- name: Run
|
||||
run: |
|
||||
if [ -n '${{ inputs.working-directory }}' ]; then
|
||||
cd "${{ inputs.working-directory }}"
|
||||
else
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
fi
|
||||
cd "${{ inputs.working-directory }}"
|
||||
${{ inputs.run_command }}
|
||||
- name: Post
|
||||
if: ${{ inputs.report_required }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --post --job-name '${{inputs.test_name}}'
|
||||
- name: Clean
|
||||
if: always()
|
||||
uses: ./.github/actions/clean
|
||||
|
21
.github/workflows/reusable_test.yml
vendored
21
.github/workflows/reusable_test.yml
vendored
@ -38,7 +38,7 @@ name: Testing workflow
|
||||
working-directory:
|
||||
description: sets custom working directory
|
||||
type: string
|
||||
default: ""
|
||||
default: "$GITHUB_WORKSPACE/tests/ci"
|
||||
secrets:
|
||||
secret_envs:
|
||||
description: if given, it's passed to the environments
|
||||
@ -96,19 +96,14 @@ jobs:
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --pre --job-name '${{inputs.test_name}}'
|
||||
- name: Run
|
||||
run: |
|
||||
if [ -n "${{ inputs.working-directory }}" ]; then
|
||||
cd "${{ inputs.working-directory }}"
|
||||
else
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
fi
|
||||
if [ -n "$(echo '${{ inputs.run_command }}' | tr -d '\n')" ]; then
|
||||
echo "Running command from workflow input"
|
||||
${{ inputs.run_command }}
|
||||
else
|
||||
echo "Running command from job config"
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --run --job-name '${{inputs.test_name}}'
|
||||
fi
|
||||
cd "${{ inputs.working-directory }}"
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" \
|
||||
--infile ${{ toJson(inputs.data) }} \
|
||||
--job-name '${{inputs.test_name}}' \
|
||||
--run \
|
||||
--run-command '''${{inputs.run_command}}'''
|
||||
- name: Post run
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ toJson(inputs.data) }} --post --job-name '${{inputs.test_name}}'
|
||||
- name: Mark as done
|
||||
|
15
.gitmessage
15
.gitmessage
@ -1,9 +1,18 @@
|
||||
|
||||
|
||||
## To avoid merge commit in CI run (add a leading space to apply):
|
||||
#no-merge-commit
|
||||
### CI modificators (add a leading space to apply):
|
||||
|
||||
## Running specified job (add a leading space to apply):
|
||||
## To avoid a merge commit in CI:
|
||||
#no_merge_commit
|
||||
|
||||
## To discard CI cache:
|
||||
#no_ci_cache
|
||||
|
||||
## To run specified set of tests in CI:
|
||||
#ci_set_<SET_NAME>
|
||||
#ci_set_reduced
|
||||
|
||||
## To run specified job in CI:
|
||||
#job_<JOB NAME>
|
||||
#job_stateless_tests_release
|
||||
#job_package_debug
|
||||
|
@ -99,7 +99,7 @@ public:
|
||||
};
|
||||
}
|
||||
|
||||
constexpr DB::UInt64 max_uint_mask = std::numeric_limits<DB::UInt64>::max();
|
||||
constexpr UInt64 max_uint_mask = std::numeric_limits<UInt64>::max();
|
||||
|
||||
namespace std
|
||||
{
|
||||
@ -114,8 +114,8 @@ namespace std
|
||||
{
|
||||
size_t operator()(const DB::Decimal128 & x) const
|
||||
{
|
||||
return std::hash<DB::Int64>()(x.value >> 64)
|
||||
^ std::hash<DB::Int64>()(x.value & max_uint_mask);
|
||||
return std::hash<Int64>()(x.value >> 64)
|
||||
^ std::hash<Int64>()(x.value & max_uint_mask);
|
||||
}
|
||||
};
|
||||
|
||||
@ -134,8 +134,8 @@ namespace std
|
||||
size_t operator()(const DB::Decimal256 & x) const
|
||||
{
|
||||
// FIXME temp solution
|
||||
return std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value >> 64 & max_uint_mask))
|
||||
^ std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value & max_uint_mask));
|
||||
return std::hash<Int64>()(static_cast<Int64>(x.value >> 64 & max_uint_mask))
|
||||
^ std::hash<Int64>()(static_cast<Int64>(x.value & max_uint_mask));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -3,15 +3,6 @@
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
using Int8 = int8_t;
|
||||
using Int16 = int16_t;
|
||||
using Int32 = int32_t;
|
||||
using Int64 = int64_t;
|
||||
|
||||
#ifndef __cpp_char8_t
|
||||
using char8_t = unsigned char;
|
||||
#endif
|
||||
|
||||
/// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
|
||||
using UInt8 = char8_t;
|
||||
|
||||
@ -19,24 +10,12 @@ using UInt16 = uint16_t;
|
||||
using UInt32 = uint32_t;
|
||||
using UInt64 = uint64_t;
|
||||
|
||||
using String = std::string;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using UInt8 = ::UInt8;
|
||||
using UInt16 = ::UInt16;
|
||||
using UInt32 = ::UInt32;
|
||||
using UInt64 = ::UInt64;
|
||||
|
||||
using Int8 = ::Int8;
|
||||
using Int16 = ::Int16;
|
||||
using Int32 = ::Int32;
|
||||
using Int64 = ::Int64;
|
||||
using Int8 = int8_t;
|
||||
using Int16 = int16_t;
|
||||
using Int32 = int32_t;
|
||||
using Int64 = int64_t;
|
||||
|
||||
using Float32 = float;
|
||||
using Float64 = double;
|
||||
|
||||
using String = std::string;
|
||||
|
||||
}
|
||||
|
@ -26,6 +26,11 @@
|
||||
#include "Poco/StreamUtil.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ReadBufferFromIStream;
|
||||
}
|
||||
|
||||
namespace Poco
|
||||
{
|
||||
|
||||
@ -120,6 +125,8 @@ protected:
|
||||
openmode getMode() const { return _mode; }
|
||||
|
||||
private:
|
||||
friend class DB::ReadBufferFromIStream;
|
||||
|
||||
virtual int readFromDevice(char_type * /*buffer*/, std::streamsize /*length*/) { return 0; }
|
||||
|
||||
virtual int writeToDevice(const char_type * /*buffer*/, std::streamsize /*length*/) { return 0; }
|
||||
|
@ -33,7 +33,8 @@ namespace Poco
|
||||
|
||||
|
||||
class Exception;
|
||||
|
||||
class Logger;
|
||||
using LoggerPtr = std::shared_ptr<Logger>;
|
||||
|
||||
class Foundation_API Logger : public Channel
|
||||
/// Logger is a special Channel that acts as the main
|
||||
@ -870,6 +871,11 @@ public:
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
/// on its parent logger.
|
||||
|
||||
static LoggerPtr getShared(const std::string & name);
|
||||
/// Returns a shared pointer to the Logger with the given name.
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
/// on its parent logger.
|
||||
|
||||
static Logger & unsafeGet(const std::string & name);
|
||||
/// Returns a reference to the Logger with the given name.
|
||||
/// If the Logger does not yet exist, it is created, based
|
||||
@ -885,6 +891,11 @@ public:
|
||||
/// given name. The Logger's Channel and log level as set as
|
||||
/// specified.
|
||||
|
||||
static LoggerPtr createShared(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
|
||||
/// Creates and returns a shared pointer to a Logger with the
|
||||
/// given name. The Logger's Channel and log level as set as
|
||||
/// specified.
|
||||
|
||||
static Logger & root();
|
||||
/// Returns a reference to the root logger, which is the ultimate
|
||||
/// ancestor of all Loggers.
|
||||
@ -893,7 +904,7 @@ public:
|
||||
/// Returns a pointer to the Logger with the given name if it
|
||||
/// exists, or a null pointer otherwise.
|
||||
|
||||
static void destroy(const std::string & name);
|
||||
static bool destroy(const std::string & name);
|
||||
/// Destroys the logger with the specified name. Does nothing
|
||||
/// if the logger is not found.
|
||||
///
|
||||
@ -938,6 +949,7 @@ protected:
|
||||
void log(const std::string & text, Message::Priority prio, const char * file, int line);
|
||||
|
||||
static std::string format(const std::string & fmt, int argc, std::string argv[]);
|
||||
static Logger & unsafeCreate(const std::string & name, Channel * pChannel, int level = Message::PRIO_INFORMATION);
|
||||
static Logger & parent(const std::string & name);
|
||||
static void add(Logger * pLogger);
|
||||
static Logger * find(const std::string & name);
|
||||
@ -952,7 +964,6 @@ private:
|
||||
std::atomic_int _level;
|
||||
|
||||
static LoggerMap * _pLoggerMap;
|
||||
static Mutex _mapMtx;
|
||||
};
|
||||
|
||||
|
||||
|
@ -38,15 +38,15 @@ public:
|
||||
/// Creates the RefCountedObject.
|
||||
/// The initial reference count is one.
|
||||
|
||||
void duplicate() const;
|
||||
/// Increments the object's reference count.
|
||||
size_t duplicate() const;
|
||||
/// Increments the object's reference count, returns reference count before call.
|
||||
|
||||
void release() const throw();
|
||||
size_t release() const throw();
|
||||
/// Decrements the object's reference count
|
||||
/// and deletes the object if the count
|
||||
/// reaches zero.
|
||||
/// reaches zero, returns reference count before call.
|
||||
|
||||
int referenceCount() const;
|
||||
size_t referenceCount() const;
|
||||
/// Returns the reference count.
|
||||
|
||||
protected:
|
||||
@ -57,36 +57,40 @@ private:
|
||||
RefCountedObject(const RefCountedObject &);
|
||||
RefCountedObject & operator=(const RefCountedObject &);
|
||||
|
||||
mutable AtomicCounter _counter;
|
||||
mutable std::atomic<size_t> _counter;
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// inlines
|
||||
//
|
||||
inline int RefCountedObject::referenceCount() const
|
||||
inline size_t RefCountedObject::referenceCount() const
|
||||
{
|
||||
return _counter.value();
|
||||
return _counter.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
|
||||
inline void RefCountedObject::duplicate() const
|
||||
inline size_t RefCountedObject::duplicate() const
|
||||
{
|
||||
++_counter;
|
||||
return _counter.fetch_add(1, std::memory_order_acq_rel);
|
||||
}
|
||||
|
||||
|
||||
inline void RefCountedObject::release() const throw()
|
||||
inline size_t RefCountedObject::release() const throw()
|
||||
{
|
||||
size_t reference_count_before = _counter.fetch_sub(1, std::memory_order_acq_rel);
|
||||
|
||||
try
|
||||
{
|
||||
if (--_counter == 0)
|
||||
if (reference_count_before == 1)
|
||||
delete this;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
poco_unexpected();
|
||||
}
|
||||
|
||||
return reference_count_before;
|
||||
}
|
||||
|
||||
|
||||
|
@ -20,12 +20,29 @@
|
||||
#include "Poco/NumberParser.h"
|
||||
#include "Poco/String.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
std::mutex & getLoggerMutex()
|
||||
{
|
||||
auto get_logger_mutex_placeholder_memory = []()
|
||||
{
|
||||
static char buffer[sizeof(std::mutex)]{};
|
||||
return buffer;
|
||||
};
|
||||
|
||||
static std::mutex * logger_mutex = new (get_logger_mutex_placeholder_memory()) std::mutex();
|
||||
return *logger_mutex;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace Poco {
|
||||
|
||||
|
||||
Logger::LoggerMap* Logger::_pLoggerMap = 0;
|
||||
Mutex Logger::_mapMtx;
|
||||
const std::string Logger::ROOT;
|
||||
|
||||
|
||||
@ -73,7 +90,7 @@ void Logger::setProperty(const std::string& name, const std::string& value)
|
||||
setChannel(LoggingRegistry::defaultRegistry().channelForName(value));
|
||||
else if (name == "level")
|
||||
setLevel(value);
|
||||
else
|
||||
else
|
||||
Channel::setProperty(name, value);
|
||||
}
|
||||
|
||||
@ -112,14 +129,14 @@ void Logger::dump(const std::string& msg, const void* buffer, std::size_t length
|
||||
|
||||
void Logger::setLevel(const std::string& name, int level)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
std::string::size_type len = name.length();
|
||||
for (LoggerMap::iterator it = _pLoggerMap->begin(); it != _pLoggerMap->end(); ++it)
|
||||
{
|
||||
if (len == 0 ||
|
||||
if (len == 0 ||
|
||||
(it->first.compare(0, len, name) == 0 && (it->first.length() == len || it->first[len] == '.')))
|
||||
{
|
||||
it->second->setLevel(level);
|
||||
@ -131,7 +148,7 @@ void Logger::setLevel(const std::string& name, int level)
|
||||
|
||||
void Logger::setChannel(const std::string& name, Channel* pChannel)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
@ -150,7 +167,7 @@ void Logger::setChannel(const std::string& name, Channel* pChannel)
|
||||
|
||||
void Logger::setProperty(const std::string& loggerName, const std::string& propertyName, const std::string& value)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
@ -280,13 +297,41 @@ void Logger::formatDump(std::string& message, const void* buffer, std::size_t le
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct LoggerDeleter
|
||||
{
|
||||
void operator()(Poco::Logger * logger)
|
||||
{
|
||||
if (Logger::destroy(logger->name()))
|
||||
return;
|
||||
|
||||
logger->release();
|
||||
}
|
||||
};
|
||||
|
||||
inline LoggerPtr makeLoggerPtr(Logger & logger)
|
||||
{
|
||||
logger.duplicate();
|
||||
return std::shared_ptr<Logger>(&logger, LoggerDeleter());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Logger& Logger::get(const std::string& name)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return unsafeGet(name);
|
||||
}
|
||||
|
||||
LoggerPtr Logger::getShared(const std::string & name)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return makeLoggerPtr(unsafeGet(name));
|
||||
}
|
||||
|
||||
Logger& Logger::unsafeGet(const std::string& name)
|
||||
{
|
||||
@ -310,18 +355,21 @@ Logger& Logger::unsafeGet(const std::string& name)
|
||||
|
||||
Logger& Logger::create(const std::string& name, Channel* pChannel, int level)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (find(name)) throw ExistsException();
|
||||
Logger* pLogger = new Logger(name, pChannel, level);
|
||||
add(pLogger);
|
||||
return *pLogger;
|
||||
return unsafeCreate(name, pChannel, level);
|
||||
}
|
||||
|
||||
LoggerPtr Logger::createShared(const std::string & name, Channel * pChannel, int level)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return makeLoggerPtr(unsafeCreate(name, pChannel, level));
|
||||
}
|
||||
|
||||
Logger& Logger::root()
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return unsafeGet(ROOT);
|
||||
}
|
||||
@ -329,7 +377,7 @@ Logger& Logger::root()
|
||||
|
||||
Logger* Logger::has(const std::string& name)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return find(name);
|
||||
}
|
||||
@ -337,7 +385,7 @@ Logger* Logger::has(const std::string& name)
|
||||
|
||||
void Logger::shutdown()
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
@ -363,25 +411,29 @@ Logger* Logger::find(const std::string& name)
|
||||
}
|
||||
|
||||
|
||||
void Logger::destroy(const std::string& name)
|
||||
bool Logger::destroy(const std::string& name)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
if (_pLoggerMap)
|
||||
{
|
||||
LoggerMap::iterator it = _pLoggerMap->find(name);
|
||||
if (it != _pLoggerMap->end())
|
||||
{
|
||||
it->second->release();
|
||||
_pLoggerMap->erase(it);
|
||||
if (it->second->release() == 1)
|
||||
_pLoggerMap->erase(it);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void Logger::names(std::vector<std::string>& names)
|
||||
{
|
||||
Mutex::ScopedLock lock(_mapMtx);
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
names.clear();
|
||||
if (_pLoggerMap)
|
||||
@ -393,6 +445,14 @@ void Logger::names(std::vector<std::string>& names)
|
||||
}
|
||||
}
|
||||
|
||||
Logger& Logger::unsafeCreate(const std::string & name, Channel * pChannel, int level)
|
||||
{
|
||||
if (find(name)) throw ExistsException();
|
||||
Logger* pLogger = new Logger(name, pChannel, level);
|
||||
add(pLogger);
|
||||
|
||||
return *pLogger;
|
||||
}
|
||||
|
||||
Logger& Logger::parent(const std::string& name)
|
||||
{
|
||||
|
@ -82,3 +82,4 @@ if (SANITIZE_COVERAGE)
|
||||
endif()
|
||||
|
||||
set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
|
||||
set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
|
||||
Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63
|
2
contrib/avro
vendored
2
contrib/avro
vendored
@ -1 +1 @@
|
||||
Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918
|
||||
Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9
|
2
contrib/azure
vendored
2
contrib/azure
vendored
@ -1 +1 @@
|
||||
Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3
|
||||
Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2
|
@ -44,12 +44,14 @@ set (SRCS_IOSTREAMS
|
||||
"${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp"
|
||||
"${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp"
|
||||
"${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp"
|
||||
"${LIBRARY_DIR}/libs/iostreams/src/zstd.cpp"
|
||||
)
|
||||
|
||||
add_library (_boost_iostreams ${SRCS_IOSTREAMS})
|
||||
add_library (boost::iostreams ALIAS _boost_iostreams)
|
||||
target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
|
||||
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib)
|
||||
target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zstd)
|
||||
|
||||
# program_options
|
||||
|
||||
|
@ -34,9 +34,9 @@ if (OS_LINUX)
|
||||
# avoid spurious latencies and additional work associated with
|
||||
# MADV_DONTNEED. See
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
|
||||
else()
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
|
||||
endif()
|
||||
# CACHE variable is empty to allow changing defaults without the necessity
|
||||
# to purge cache
|
||||
@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE
|
||||
|
||||
target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)
|
||||
|
||||
# Because our coverage callbacks call malloc, and recursive call of malloc could not work.
|
||||
target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST})
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
target_compile_definitions(_jemalloc PRIVATE
|
||||
-DJEMALLOC_DEBUG=1
|
||||
|
@ -33,7 +33,6 @@ set(SRCS
|
||||
"${LIBCXX_SOURCE_DIR}/src/optional.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/random.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/regex.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
|
||||
"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
|
||||
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec
|
||||
Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
|
2
contrib/rocksdb
vendored
2
contrib/rocksdb
vendored
@ -1 +1 @@
|
||||
Subproject commit 66e3cbec31400ed3a23deb878c5d7f56f990f0ae
|
||||
Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430
|
6
contrib/update-submodules.sh
vendored
6
contrib/update-submodules.sh
vendored
@ -6,9 +6,15 @@ SCRIPT_DIR=$(dirname "${SCRIPT_PATH}")
|
||||
GIT_DIR=$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)
|
||||
cd $GIT_DIR
|
||||
|
||||
# Exclude from contribs some garbage subdirs that we don't need.
|
||||
# It reduces the checked out files size about 3 times and therefore speeds up indexing in IDEs and searching.
|
||||
# NOTE .git/ still contains everything that we don't check out (although, it's compressed)
|
||||
# See also https://git-scm.com/docs/git-sparse-checkout
|
||||
contrib/sparse-checkout/setup-sparse-checkout.sh
|
||||
|
||||
git submodule init
|
||||
git submodule sync
|
||||
|
||||
# NOTE: do not use --remote for `git submodule update`[1] command, since the submodule references to the specific commit SHA1 in the subproject.
|
||||
# It may cause unexpected behavior. Instead you need to commit a new SHA1 for a submodule.
|
||||
#
|
||||
|
@ -49,17 +49,10 @@ CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||
CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$USER_PATH" \
|
||||
"$FORMAT_SCHEMA_PATH" \
|
||||
"${DISKS_PATHS[@]}" \
|
||||
"${DISKS_METADATA_PATHS[@]}"
|
||||
do
|
||||
function create_directory_and_do_chown() {
|
||||
local dir=$1
|
||||
# check if variable not empty
|
||||
[ -z "$dir" ] && continue
|
||||
[ -z "$dir" ] && return
|
||||
# ensure directories exist
|
||||
if [ "$DO_CHOWN" = "1" ]; then
|
||||
mkdir="mkdir"
|
||||
@ -81,6 +74,23 @@ do
|
||||
chown -R "$USER:$GROUP" "$dir"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
create_directory_and_do_chown "$DATA_DIR"
|
||||
|
||||
# Change working directory to $DATA_DIR in case there're paths relative to $DATA_DIR, also avoids running
|
||||
# clickhouse-server at root directory.
|
||||
cd "$DATA_DIR"
|
||||
|
||||
for dir in "$ERROR_LOG_DIR" \
|
||||
"$LOG_DIR" \
|
||||
"$TMP_DIR" \
|
||||
"$USER_PATH" \
|
||||
"$FORMAT_SCHEMA_PATH" \
|
||||
"${DISKS_PATHS[@]}" \
|
||||
"${DISKS_METADATA_PATHS[@]}"
|
||||
do
|
||||
create_directory_and_do_chown "$dir"
|
||||
done
|
||||
|
||||
# if clickhouse user is defined - create it (user "default" already exists out of box)
|
||||
|
@ -242,7 +242,7 @@ quit
|
||||
--create-query-fuzzer-runs=50 \
|
||||
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
|
||||
$NEW_TESTS_OPT \
|
||||
> >(tail -n 100000 > fuzzer.log) \
|
||||
> fuzzer.log \
|
||||
2>&1 &
|
||||
fuzzer_pid=$!
|
||||
echo "Fuzzer pid is $fuzzer_pid"
|
||||
@ -390,6 +390,7 @@ rg --text -F '<Fatal>' server.log > fatal.log ||:
|
||||
dmesg -T > dmesg.log ||:
|
||||
|
||||
zstd --threads=0 server.log
|
||||
zstd --threads=0 fuzzer.log
|
||||
|
||||
cat > report.html <<EOF ||:
|
||||
<!DOCTYPE html>
|
||||
@ -413,7 +414,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
|
||||
<h1>AST Fuzzer for PR <a href="https://github.com/ClickHouse/ClickHouse/pull/${PR_TO_TEST}">#${PR_TO_TEST}</a> @ ${SHA_TO_TEST}</h1>
|
||||
<p class="links">
|
||||
<a href="run.log">run.log</a>
|
||||
<a href="fuzzer.log">fuzzer.log</a>
|
||||
<a href="fuzzer.log.zst">fuzzer.log.zst</a>
|
||||
<a href="server.log.zst">server.log.zst</a>
|
||||
<a href="main.log">main.log</a>
|
||||
<a href="dmesg.log">dmesg.log</a>
|
||||
|
@ -11,14 +11,6 @@ RUN apt-get update -y \
|
||||
npm \
|
||||
&& apt-get clean
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||
ENV DATASETS="hits visits"
|
||||
|
||||
# The following is already done in clickhouse/stateless-test
|
||||
# RUN npm install -g azurite
|
||||
# RUN npm install tslib
|
||||
|
||||
COPY create.sql /
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
333
docker/test/stateful/create.sql
Normal file
333
docker/test/stateful/create.sql
Normal file
@ -0,0 +1,333 @@
|
||||
ATTACH TABLE datasets.hits_v1 UUID '78ebf6a1-d987-4579-b3ec-00c1a087b1f3'
|
||||
(
|
||||
WatchID UInt64,
|
||||
JavaEnable UInt8,
|
||||
Title String,
|
||||
GoodEvent Int16,
|
||||
EventTime DateTime,
|
||||
EventDate Date,
|
||||
CounterID UInt32,
|
||||
ClientIP UInt32,
|
||||
ClientIP6 FixedString(16),
|
||||
RegionID UInt32,
|
||||
UserID UInt64,
|
||||
CounterClass Int8,
|
||||
OS UInt8,
|
||||
UserAgent UInt8,
|
||||
URL String,
|
||||
Referer String,
|
||||
URLDomain String,
|
||||
RefererDomain String,
|
||||
Refresh UInt8,
|
||||
IsRobot UInt8,
|
||||
RefererCategories Array(UInt16),
|
||||
URLCategories Array(UInt16),
|
||||
URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32),
|
||||
ResolutionWidth UInt16,
|
||||
ResolutionHeight UInt16,
|
||||
ResolutionDepth UInt8,
|
||||
FlashMajor UInt8,
|
||||
FlashMinor UInt8,
|
||||
FlashMinor2 String,
|
||||
NetMajor UInt8,
|
||||
NetMinor UInt8,
|
||||
UserAgentMajor UInt16,
|
||||
UserAgentMinor FixedString(2),
|
||||
CookieEnable UInt8,
|
||||
JavascriptEnable UInt8,
|
||||
IsMobile UInt8,
|
||||
MobilePhone UInt8,
|
||||
MobilePhoneModel String,
|
||||
Params String,
|
||||
IPNetworkID UInt32,
|
||||
TraficSourceID Int8,
|
||||
SearchEngineID UInt16,
|
||||
SearchPhrase String,
|
||||
AdvEngineID UInt8,
|
||||
IsArtifical UInt8,
|
||||
WindowClientWidth UInt16,
|
||||
WindowClientHeight UInt16,
|
||||
ClientTimeZone Int16,
|
||||
ClientEventTime DateTime,
|
||||
SilverlightVersion1 UInt8,
|
||||
SilverlightVersion2 UInt8,
|
||||
SilverlightVersion3 UInt32,
|
||||
SilverlightVersion4 UInt16,
|
||||
PageCharset String,
|
||||
CodeVersion UInt32,
|
||||
IsLink UInt8,
|
||||
IsDownload UInt8,
|
||||
IsNotBounce UInt8,
|
||||
FUniqID UInt64,
|
||||
HID UInt32,
|
||||
IsOldCounter UInt8,
|
||||
IsEvent UInt8,
|
||||
IsParameter UInt8,
|
||||
DontCountHits UInt8,
|
||||
WithHash UInt8,
|
||||
HitColor FixedString(1),
|
||||
UTCEventTime DateTime,
|
||||
Age UInt8,
|
||||
Sex UInt8,
|
||||
Income UInt8,
|
||||
Interests UInt16,
|
||||
Robotness UInt8,
|
||||
GeneralInterests Array(UInt16),
|
||||
RemoteIP UInt32,
|
||||
RemoteIP6 FixedString(16),
|
||||
WindowName Int32,
|
||||
OpenerName Int32,
|
||||
HistoryLength Int16,
|
||||
BrowserLanguage FixedString(2),
|
||||
BrowserCountry FixedString(2),
|
||||
SocialNetwork String,
|
||||
SocialAction String,
|
||||
HTTPError UInt16,
|
||||
SendTiming Int32,
|
||||
DNSTiming Int32,
|
||||
ConnectTiming Int32,
|
||||
ResponseStartTiming Int32,
|
||||
ResponseEndTiming Int32,
|
||||
FetchTiming Int32,
|
||||
RedirectTiming Int32,
|
||||
DOMInteractiveTiming Int32,
|
||||
DOMContentLoadedTiming Int32,
|
||||
DOMCompleteTiming Int32,
|
||||
LoadEventStartTiming Int32,
|
||||
LoadEventEndTiming Int32,
|
||||
NSToDOMContentLoadedTiming Int32,
|
||||
FirstPaintTiming Int32,
|
||||
RedirectCount Int8,
|
||||
SocialSourceNetworkID UInt8,
|
||||
SocialSourcePage String,
|
||||
ParamPrice Int64,
|
||||
ParamOrderID String,
|
||||
ParamCurrency FixedString(3),
|
||||
ParamCurrencyID UInt16,
|
||||
GoalsReached Array(UInt32),
|
||||
OpenstatServiceName String,
|
||||
OpenstatCampaignID String,
|
||||
OpenstatAdID String,
|
||||
OpenstatSourceID String,
|
||||
UTMSource String,
|
||||
UTMMedium String,
|
||||
UTMCampaign String,
|
||||
UTMContent String,
|
||||
UTMTerm String,
|
||||
FromTag String,
|
||||
HasGCLID UInt8,
|
||||
RefererHash UInt64,
|
||||
URLHash UInt64,
|
||||
CLID UInt32,
|
||||
YCLID UInt64,
|
||||
ShareService String,
|
||||
ShareURL String,
|
||||
ShareTitle String,
|
||||
"ParsedParams.Key1" Array(String),
|
||||
"ParsedParams.Key2" Array(String),
|
||||
"ParsedParams.Key3" Array(String),
|
||||
"ParsedParams.Key4" Array(String),
|
||||
"ParsedParams.Key5" Array(String),
|
||||
"ParsedParams.ValueDouble" Array(Float64),
|
||||
IslandID FixedString(16),
|
||||
RequestNum UInt32,
|
||||
RequestTry UInt8
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID))
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
|
||||
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
|
||||
|
||||
ATTACH TABLE datasets.visits_v1 UUID '5131f834-711f-4168-98a5-968b691a104b'
|
||||
(
|
||||
CounterID UInt32,
|
||||
StartDate Date,
|
||||
Sign Int8,
|
||||
IsNew UInt8,
|
||||
VisitID UInt64,
|
||||
UserID UInt64,
|
||||
StartTime DateTime,
|
||||
Duration UInt32,
|
||||
UTCStartTime DateTime,
|
||||
PageViews Int32,
|
||||
Hits Int32,
|
||||
IsBounce UInt8,
|
||||
Referer String,
|
||||
StartURL String,
|
||||
RefererDomain String,
|
||||
StartURLDomain String,
|
||||
EndURL String,
|
||||
LinkURL String,
|
||||
IsDownload UInt8,
|
||||
TraficSourceID Int8,
|
||||
SearchEngineID UInt16,
|
||||
SearchPhrase String,
|
||||
AdvEngineID UInt8,
|
||||
PlaceID Int32,
|
||||
RefererCategories Array(UInt16),
|
||||
URLCategories Array(UInt16),
|
||||
URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32),
|
||||
IsYandex UInt8,
|
||||
GoalReachesDepth Int32,
|
||||
GoalReachesURL Int32,
|
||||
GoalReachesAny Int32,
|
||||
SocialSourceNetworkID UInt8,
|
||||
SocialSourcePage String,
|
||||
MobilePhoneModel String,
|
||||
ClientEventTime DateTime,
|
||||
RegionID UInt32,
|
||||
ClientIP UInt32,
|
||||
ClientIP6 FixedString(16),
|
||||
RemoteIP UInt32,
|
||||
RemoteIP6 FixedString(16),
|
||||
IPNetworkID UInt32,
|
||||
SilverlightVersion3 UInt32,
|
||||
CodeVersion UInt32,
|
||||
ResolutionWidth UInt16,
|
||||
ResolutionHeight UInt16,
|
||||
UserAgentMajor UInt16,
|
||||
UserAgentMinor UInt16,
|
||||
WindowClientWidth UInt16,
|
||||
WindowClientHeight UInt16,
|
||||
SilverlightVersion2 UInt8,
|
||||
SilverlightVersion4 UInt16,
|
||||
FlashVersion3 UInt16,
|
||||
FlashVersion4 UInt16,
|
||||
ClientTimeZone Int16,
|
||||
OS UInt8,
|
||||
UserAgent UInt8,
|
||||
ResolutionDepth UInt8,
|
||||
FlashMajor UInt8,
|
||||
FlashMinor UInt8,
|
||||
NetMajor UInt8,
|
||||
NetMinor UInt8,
|
||||
MobilePhone UInt8,
|
||||
SilverlightVersion1 UInt8,
|
||||
Age UInt8,
|
||||
Sex UInt8,
|
||||
Income UInt8,
|
||||
JavaEnable UInt8,
|
||||
CookieEnable UInt8,
|
||||
JavascriptEnable UInt8,
|
||||
IsMobile UInt8,
|
||||
BrowserLanguage UInt16,
|
||||
BrowserCountry UInt16,
|
||||
Interests UInt16,
|
||||
Robotness UInt8,
|
||||
GeneralInterests Array(UInt16),
|
||||
Params Array(String),
|
||||
"Goals.ID" Array(UInt32),
|
||||
"Goals.Serial" Array(UInt32),
|
||||
"Goals.EventTime" Array(DateTime),
|
||||
"Goals.Price" Array(Int64),
|
||||
"Goals.OrderID" Array(String),
|
||||
"Goals.CurrencyID" Array(UInt32),
|
||||
WatchIDs Array(UInt64),
|
||||
ParamSumPrice Int64,
|
||||
ParamCurrency FixedString(3),
|
||||
ParamCurrencyID UInt16,
|
||||
ClickLogID UInt64,
|
||||
ClickEventID Int32,
|
||||
ClickGoodEvent Int32,
|
||||
ClickEventTime DateTime,
|
||||
ClickPriorityID Int32,
|
||||
ClickPhraseID Int32,
|
||||
ClickPageID Int32,
|
||||
ClickPlaceID Int32,
|
||||
ClickTypeID Int32,
|
||||
ClickResourceID Int32,
|
||||
ClickCost UInt32,
|
||||
ClickClientIP UInt32,
|
||||
ClickDomainID UInt32,
|
||||
ClickURL String,
|
||||
ClickAttempt UInt8,
|
||||
ClickOrderID UInt32,
|
||||
ClickBannerID UInt32,
|
||||
ClickMarketCategoryID UInt32,
|
||||
ClickMarketPP UInt32,
|
||||
ClickMarketCategoryName String,
|
||||
ClickMarketPPName String,
|
||||
ClickAWAPSCampaignName String,
|
||||
ClickPageName String,
|
||||
ClickTargetType UInt16,
|
||||
ClickTargetPhraseID UInt64,
|
||||
ClickContextType UInt8,
|
||||
ClickSelectType Int8,
|
||||
ClickOptions String,
|
||||
ClickGroupBannerID Int32,
|
||||
OpenstatServiceName String,
|
||||
OpenstatCampaignID String,
|
||||
OpenstatAdID String,
|
||||
OpenstatSourceID String,
|
||||
UTMSource String,
|
||||
UTMMedium String,
|
||||
UTMCampaign String,
|
||||
UTMContent String,
|
||||
UTMTerm String,
|
||||
FromTag String,
|
||||
HasGCLID UInt8,
|
||||
FirstVisit DateTime,
|
||||
PredLastVisit Date,
|
||||
LastVisit Date,
|
||||
TotalVisits UInt32,
|
||||
"TraficSource.ID" Array(Int8),
|
||||
"TraficSource.SearchEngineID" Array(UInt16),
|
||||
"TraficSource.AdvEngineID" Array(UInt8),
|
||||
"TraficSource.PlaceID" Array(UInt16),
|
||||
"TraficSource.SocialSourceNetworkID" Array(UInt8),
|
||||
"TraficSource.Domain" Array(String),
|
||||
"TraficSource.SearchPhrase" Array(String),
|
||||
"TraficSource.SocialSourcePage" Array(String),
|
||||
Attendance FixedString(16),
|
||||
CLID UInt32,
|
||||
YCLID UInt64,
|
||||
NormalizedRefererHash UInt64,
|
||||
SearchPhraseHash UInt64,
|
||||
RefererDomainHash UInt64,
|
||||
NormalizedStartURLHash UInt64,
|
||||
StartURLDomainHash UInt64,
|
||||
NormalizedEndURLHash UInt64,
|
||||
TopLevelDomain UInt64,
|
||||
URLScheme UInt64,
|
||||
OpenstatServiceNameHash UInt64,
|
||||
OpenstatCampaignIDHash UInt64,
|
||||
OpenstatAdIDHash UInt64,
|
||||
OpenstatSourceIDHash UInt64,
|
||||
UTMSourceHash UInt64,
|
||||
UTMMediumHash UInt64,
|
||||
UTMCampaignHash UInt64,
|
||||
UTMContentHash UInt64,
|
||||
UTMTermHash UInt64,
|
||||
FromHash UInt64,
|
||||
WebVisorEnabled UInt8,
|
||||
WebVisorActivity UInt32,
|
||||
"ParsedParams.Key1" Array(String),
|
||||
"ParsedParams.Key2" Array(String),
|
||||
"ParsedParams.Key3" Array(String),
|
||||
"ParsedParams.Key4" Array(String),
|
||||
"ParsedParams.Key5" Array(String),
|
||||
"ParsedParams.ValueDouble" Array(Float64),
|
||||
"Market.Type" Array(UInt8),
|
||||
"Market.GoalID" Array(UInt32),
|
||||
"Market.OrderID" Array(String),
|
||||
"Market.OrderPrice" Array(Int64),
|
||||
"Market.PP" Array(UInt32),
|
||||
"Market.DirectPlaceID" Array(UInt32),
|
||||
"Market.DirectOrderID" Array(UInt32),
|
||||
"Market.DirectBannerID" Array(UInt32),
|
||||
"Market.GoodID" Array(String),
|
||||
"Market.GoodName" Array(String),
|
||||
"Market.GoodQuantity" Array(Int32),
|
||||
"Market.GoodPrice" Array(Int64),
|
||||
IslandID FixedString(16)
|
||||
)
|
||||
ENGINE = CollapsingMergeTree(Sign)
|
||||
PARTITION BY toYYYYMM(StartDate)
|
||||
ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID)
|
||||
SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
|
||||
disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
|
@ -97,21 +97,9 @@ start
|
||||
|
||||
setup_logs_replication
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
clickhouse-client --query "SHOW DATABASES"
|
||||
|
||||
clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
|
||||
|
||||
service clickhouse-server restart
|
||||
|
||||
# Wait for server to start accepting connections
|
||||
for _ in {1..120}; do
|
||||
clickhouse-client --query "SELECT 1" && break
|
||||
sleep 1
|
||||
done
|
||||
|
||||
clickhouse-client --query "CREATE DATABASE datasets"
|
||||
clickhouse-client --multiquery < create.sql
|
||||
clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
|
@ -1,126 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import tarfile
|
||||
import logging
|
||||
import argparse
|
||||
import requests
|
||||
import tempfile
|
||||
|
||||
|
||||
DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
|
||||
|
||||
AVAILABLE_DATASETS = {
|
||||
"hits": "hits_v1.tar",
|
||||
"visits": "visits_v1.tar",
|
||||
}
|
||||
|
||||
RETRIES_COUNT = 5
|
||||
|
||||
|
||||
def _get_temp_file_name():
|
||||
return os.path.join(
|
||||
tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
|
||||
)
|
||||
|
||||
|
||||
def build_url(base_url, dataset):
|
||||
return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
|
||||
|
||||
|
||||
def download_with_progress(url, path):
|
||||
logging.info("Downloading from %s to temp path %s", url, path)
|
||||
for i in range(RETRIES_COUNT):
|
||||
try:
|
||||
with open(path, "wb") as f:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total_length = response.headers.get("content-length")
|
||||
if total_length is None or int(total_length) == 0:
|
||||
logging.info(
|
||||
"No content-length, will download file without progress"
|
||||
)
|
||||
f.write(response.content)
|
||||
else:
|
||||
dl = 0
|
||||
total_length = int(total_length)
|
||||
logging.info("Content length is %ld bytes", total_length)
|
||||
for data in response.iter_content(chunk_size=4096):
|
||||
dl += len(data)
|
||||
f.write(data)
|
||||
if sys.stdout.isatty():
|
||||
done = int(50 * dl / total_length)
|
||||
percent = int(100 * float(dl) / total_length)
|
||||
sys.stdout.write(
|
||||
"\r[{}{}] {}%".format(
|
||||
"=" * done, " " * (50 - done), percent
|
||||
)
|
||||
)
|
||||
sys.stdout.flush()
|
||||
break
|
||||
except Exception as ex:
|
||||
sys.stdout.write("\n")
|
||||
time.sleep(3)
|
||||
logging.info("Exception while downloading %s, retry %s", ex, i + 1)
|
||||
if os.path.exists(path):
|
||||
os.remove(path)
|
||||
else:
|
||||
raise Exception(
|
||||
"Cannot download dataset from {}, all retries exceeded".format(url)
|
||||
)
|
||||
|
||||
sys.stdout.write("\n")
|
||||
logging.info("Downloading finished")
|
||||
|
||||
|
||||
def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
|
||||
logging.info(
|
||||
"Will unpack data from temp path %s to clickhouse db %s",
|
||||
tar_path,
|
||||
clickhouse_path,
|
||||
)
|
||||
with tarfile.open(tar_path, "r") as comp_file:
|
||||
comp_file.extractall(path=clickhouse_path)
|
||||
logging.info("Unpack finished")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Simple tool for dowloading datasets for clickhouse from S3"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--dataset-names",
|
||||
required=True,
|
||||
nargs="+",
|
||||
choices=list(AVAILABLE_DATASETS.keys()),
|
||||
)
|
||||
parser.add_argument("--url-prefix", default=DEFAULT_URL)
|
||||
parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
|
||||
|
||||
args = parser.parse_args()
|
||||
datasets = args.dataset_names
|
||||
logging.info("Will fetch following datasets: %s", ", ".join(datasets))
|
||||
for dataset in datasets:
|
||||
logging.info("Processing %s", dataset)
|
||||
temp_archive_path = _get_temp_file_name()
|
||||
try:
|
||||
download_url_for_dataset = build_url(args.url_prefix, dataset)
|
||||
download_with_progress(download_url_for_dataset, temp_archive_path)
|
||||
unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
|
||||
except Exception as ex:
|
||||
logging.info("Some exception occured %s", str(ex))
|
||||
raise
|
||||
finally:
|
||||
logging.info(
|
||||
"Will remove downloaded file %s from filesystem if it exists",
|
||||
temp_archive_path,
|
||||
)
|
||||
if os.path.exists(temp_archive_path):
|
||||
os.remove(temp_archive_path)
|
||||
logging.info("Processing of %s finished", dataset)
|
||||
logging.info("Fetch finished, enjoy your tables!")
|
@ -46,7 +46,7 @@ RUN apt-get update -y \
|
||||
p7zip-full \
|
||||
&& apt-get clean
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
RUN pip3 install numpy scipy pandas Jinja2 pyarrow
|
||||
|
||||
RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
&& wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
|
||||
|
@ -99,6 +99,16 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
||||
> /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
|
||||
sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
|
||||
> /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
|
||||
|
||||
sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
|
||||
| sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" \
|
||||
> /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
|
||||
mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
|
||||
|
||||
mkdir -p /var/run/clickhouse-server1
|
||||
sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
|
||||
sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \
|
||||
@ -235,6 +245,20 @@ clickhouse-client -q "system flush logs" ||:
|
||||
# stop logs replication to make it possible to dump logs tables via clickhouse-local
|
||||
stop_logs_replication
|
||||
|
||||
# Try to get logs while server is running
|
||||
successfuly_saved=0
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
do
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst
|
||||
successfuly_saved=$?
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst
|
||||
successfuly_saved=$((successfuly_saved | $?))
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst
|
||||
successfuly_saved=$((successfuly_saved | $?))
|
||||
fi
|
||||
done
|
||||
|
||||
# Stop server so we can safely read data with clickhouse-local.
|
||||
# Why do we read data with clickhouse-local?
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
@ -254,21 +278,25 @@ if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TR
|
||||
data_path_config="--config-file=/etc/clickhouse-server/config.xml"
|
||||
fi
|
||||
|
||||
# Compress tables.
|
||||
#
|
||||
# NOTE:
|
||||
# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log
|
||||
do
|
||||
clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
|
||||
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
|
||||
fi
|
||||
done
|
||||
|
||||
# If server crashed dump system logs with clickhouse-local
|
||||
if [ $successfuly_saved -ne 0 ]; then
|
||||
# Compress tables.
|
||||
#
|
||||
# NOTE:
|
||||
# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
|
||||
do
|
||||
clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||:
|
||||
clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||:
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Also export trace log in flamegraph-friendly format.
|
||||
for trace_type in CPU Memory Real
|
||||
|
@ -23,8 +23,6 @@ RUN apt-get update -y \
|
||||
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -59,12 +59,11 @@ start
|
||||
|
||||
setup_logs_replication
|
||||
|
||||
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
|
||||
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
|
||||
chmod 777 -R /var/lib/clickhouse
|
||||
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
|
||||
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
|
||||
clickhouse-client --query "CREATE DATABASE datasets"
|
||||
clickhouse-client --multiquery < create.sql
|
||||
clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
|
||||
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
|
||||
@ -193,7 +192,7 @@ stop
|
||||
|
||||
# Let's enable S3 storage by default
|
||||
export USE_S3_STORAGE_FOR_MERGE_TREE=1
|
||||
export $RANDOMIZE_OBJECT_KEY_TYPE=1
|
||||
export RANDOMIZE_OBJECT_KEY_TYPE=1
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
configure
|
||||
|
||||
|
@ -56,6 +56,9 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys
|
||||
# Install previous release packages
|
||||
install_packages previous_release_package_folder
|
||||
|
||||
# Save old settings from system table for settings changes check
|
||||
clickhouse-local -q "select * from system.settings format Native" > old_settings.native
|
||||
|
||||
# Initial run without S3 to create system.*_log on local file system to make it
|
||||
# available for dump via clickhouse-local
|
||||
configure
|
||||
@ -78,6 +81,7 @@ remove_keeper_config "create_if_not_exists" "[01]"
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
|
||||
@ -117,6 +121,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
rm /etc/clickhouse-server/config.d/merge_tree.xml
|
||||
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
|
||||
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
|
||||
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
|
||||
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
|
||||
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
|
||||
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
|
||||
@ -150,6 +155,63 @@ install_packages package_folder
|
||||
export ZOOKEEPER_FAULT_INJECTION=1
|
||||
configure
|
||||
|
||||
# Check that all new/changed setting were added in settings changes history.
|
||||
# Some settings can be different for builds with sanitizers, so we check
|
||||
# settings changes only for non-sanitizer builds.
|
||||
IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'")
|
||||
if [ "${IS_SANITIZED}" -eq "0" ]
|
||||
then
|
||||
clickhouse-local -q "select * from system.settings format Native" > new_settings.native
|
||||
clickhouse-local -nmq "
|
||||
CREATE TABLE old_settings AS file('old_settings.native');
|
||||
CREATE TABLE new_settings AS file('new_settings.native');
|
||||
|
||||
SELECT
|
||||
name,
|
||||
new_settings.value AS new_value,
|
||||
old_settings.value AS old_value
|
||||
FROM new_settings
|
||||
LEFT JOIN old_settings ON new_settings.name = old_settings.name
|
||||
WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM system.settings_changes
|
||||
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
|
||||
))
|
||||
SETTINGS join_use_nulls = 1
|
||||
INTO OUTFILE 'changed_settings.txt'
|
||||
FORMAT PrettyCompactNoEscapes;
|
||||
|
||||
SELECT name
|
||||
FROM new_settings
|
||||
WHERE (name NOT IN (
|
||||
SELECT name
|
||||
FROM old_settings
|
||||
)) AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM system.settings_changes
|
||||
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
|
||||
))
|
||||
INTO OUTFILE 'new_settings.txt'
|
||||
FORMAT PrettyCompactNoEscapes;
|
||||
"
|
||||
|
||||
if [ -s changed_settings.txt ]
|
||||
then
|
||||
mv changed_settings.txt /test_output/
|
||||
echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv
|
||||
fi
|
||||
|
||||
if [ -s new_settings.txt ]
|
||||
then
|
||||
mv new_settings.txt /test_output/
|
||||
echo -e "New settings are not reflected in settings changes history (see new_settings.txt)$FAIL$(head_escaped /test_output/new_settings.txt)" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "There are no new settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv
|
||||
fi
|
||||
fi
|
||||
|
||||
# Just in case previous version left some garbage in zk
|
||||
sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
|
||||
| sed "s|>1<|>0<|g" \
|
||||
@ -255,6 +317,8 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
(test like '%Changed settings%') DESC,
|
||||
(test like '%New settings%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
@ -11,6 +11,7 @@ sidebar_label: 2023
|
||||
* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* IPv6 bloom filter indexes created prior to March 2023 are not compatible with current version and have to be rebuilt. [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### New Feature
|
||||
* Added new type of authentication based on SSH keys. It works only for Native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)).
|
||||
|
@ -16,7 +16,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
) ENGINE = MySQL('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
|
||||
) ENGINE = MySQL({host:port, database, table, user, password[, replace_query, on_duplicate_clause] | named_collection[, option=value [,..]]})
|
||||
SETTINGS
|
||||
[ connection_pool_size=16, ]
|
||||
[ connection_max_tries=3, ]
|
||||
@ -42,23 +42,17 @@ The MySQL Table Engine is currently not available on the ClickHouse builds for M
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — MySQL server address.
|
||||
|
||||
- `database` — Remote database name.
|
||||
|
||||
- `table` — Remote table name.
|
||||
|
||||
- `user` — MySQL user.
|
||||
|
||||
- `password` — User password.
|
||||
|
||||
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted.
|
||||
|
||||
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query.
|
||||
|
||||
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
|
||||
|
||||
To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception.
|
||||
|
||||
Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment.
|
||||
|
||||
Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are executed on the MySQL server.
|
||||
|
||||
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
|
||||
@ -71,7 +65,7 @@ CREATE TABLE test_replicas (id UInt32, name String, age UInt32, money UInt32) EN
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Table in MySQL:
|
||||
Create table in MySQL:
|
||||
|
||||
``` text
|
||||
mysql> CREATE TABLE `test`.`test` (
|
||||
@ -94,7 +88,7 @@ mysql> select * from test;
|
||||
1 row in set (0,00 sec)
|
||||
```
|
||||
|
||||
Table in ClickHouse, retrieving data from the MySQL table created above:
|
||||
Create table in ClickHouse using plain arguments:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE mysql_table
|
||||
@ -105,6 +99,25 @@ CREATE TABLE mysql_table
|
||||
ENGINE = MySQL('localhost:3306', 'test', 'test', 'bayonet', '123')
|
||||
```
|
||||
|
||||
Or using [named collections](/docs/en/operations/named-collections.md):
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
host = 'localhost',
|
||||
port = 3306,
|
||||
database = 'test',
|
||||
user = 'bayonet',
|
||||
password = '123';
|
||||
CREATE TABLE mysql_table
|
||||
(
|
||||
`float_nullable` Nullable(Float32),
|
||||
`int_id` Int32
|
||||
)
|
||||
ENGINE = MySQL(creds, table='test')
|
||||
```
|
||||
|
||||
Retrieving data from MySQL table:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM mysql_table
|
||||
```
|
||||
|
@ -16,7 +16,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
name1 type1 [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 type2 [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
|
||||
) ENGINE = PostgreSQL({host:port, database, table, user, password[, schema, [, on_conflict]] | named_collection[, option=value [,..]]})
|
||||
```
|
||||
|
||||
See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query.
|
||||
@ -35,31 +35,25 @@ The table structure can differ from the original PostgreSQL table structure:
|
||||
- `user` — PostgreSQL user.
|
||||
- `password` — User password.
|
||||
- `schema` — Non-default table schema. Optional.
|
||||
- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient.
|
||||
- `on_conflict` — Conflict resolution strategy. Example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient.
|
||||
|
||||
or via config (since version 21.11):
|
||||
[Named collections](/docs/en/operations/named-collections.md) (available since version 21.11) are recommended for production environment. Here is an example:
|
||||
|
||||
```
|
||||
<named_collections>
|
||||
<postgres1>
|
||||
<host></host>
|
||||
<port></port>
|
||||
<user></user>
|
||||
<password></password>
|
||||
<table></table>
|
||||
</postgres1>
|
||||
<postgres2>
|
||||
<host></host>
|
||||
<port></port>
|
||||
<user></user>
|
||||
<password></password>
|
||||
</postgres2>
|
||||
<postgres_creds>
|
||||
<host>localhost</host>
|
||||
<port>5432</port>
|
||||
<user>postgres</user>
|
||||
<password>****</password>
|
||||
<schema>schema1</schema>
|
||||
</postgres_creds>
|
||||
</named_collections>
|
||||
```
|
||||
|
||||
Some parameters can be overridden by key value arguments:
|
||||
``` sql
|
||||
SELECT * FROM postgresql(postgres1, schema='schema1', table='table1');
|
||||
SELECT * FROM postgresql(postgres_creds, table='table1');
|
||||
```
|
||||
|
||||
## Implementation Details {#implementation-details}
|
||||
|
@ -16,30 +16,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
|
||||
name1 [type1],
|
||||
name2 [type2],
|
||||
...
|
||||
) ENGINE = Redis(host:port[, db_index[, password[, pool_size]]]) PRIMARY KEY(primary_key_name);
|
||||
) ENGINE = Redis({host:port[, db_index[, password[, pool_size]]] | named_collection[, option=value [,..]] })
|
||||
PRIMARY KEY(primary_key_name);
|
||||
```
|
||||
|
||||
**Engine Parameters**
|
||||
|
||||
- `host:port` — Redis server address, you can ignore port and default Redis port 6379 will be used.
|
||||
|
||||
- `db_index` — Redis db index range from 0 to 15, default is 0.
|
||||
|
||||
- `password` — User password, default is blank string.
|
||||
|
||||
- `pool_size` — Redis max connection pool size, default is 16.
|
||||
|
||||
- `primary_key_name` - any column name in the column list.
|
||||
|
||||
- `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.
|
||||
:::note Serialization
|
||||
`PRIMARY KEY` supports only one column. The primary key will be serialized in binary as a Redis key.
|
||||
Columns other than the primary key will be serialized in binary as Redis value in corresponding order.
|
||||
:::
|
||||
|
||||
- columns other than the primary key will be serialized in binary as Redis value in corresponding order.
|
||||
Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment. At this moment, all parameters passed using named collections to redis are required.
|
||||
|
||||
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
:::note Filtering
|
||||
Queries with `key equals` or `in filtering` will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
:::
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Create a table in ClickHouse which allows to read data from Redis:
|
||||
Create a table in ClickHouse using `Redis` engine with plain arguments:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE redis_table
|
||||
@ -52,6 +54,31 @@ CREATE TABLE redis_table
|
||||
ENGINE = Redis('redis1:6379') PRIMARY KEY(key);
|
||||
```
|
||||
|
||||
Or using [named collections](/docs/en/operations/named-collections.md):
|
||||
|
||||
```
|
||||
<named_collections>
|
||||
<redis_creds>
|
||||
<host>localhost</host>
|
||||
<port>6379</port>
|
||||
<password>****</password>
|
||||
<pool_size>16</pool_size>
|
||||
<db_index>s0</db_index>
|
||||
</redis_creds>
|
||||
</named_collections>
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE redis_table
|
||||
(
|
||||
`key` String,
|
||||
`v1` UInt32,
|
||||
`v2` String,
|
||||
`v3` Float32
|
||||
)
|
||||
ENGINE = Redis(redis_creds) PRIMARY KEY(key);
|
||||
```
|
||||
|
||||
Insert:
|
||||
|
||||
```sql
|
||||
|
@ -39,8 +39,8 @@ If you need to update rows frequently, we recommend using the [`ReplacingMergeTr
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY],
|
||||
name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTIC(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTIC(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
|
||||
INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
|
||||
@ -56,7 +56,7 @@ ORDER BY expr
|
||||
[DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
|
||||
[WHERE conditions]
|
||||
[GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
|
||||
[SETTINGS name=value, ...]
|
||||
[SETTINGS name = value, ...]
|
||||
```
|
||||
|
||||
For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md).
|
||||
@ -508,7 +508,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
|
||||
| [notEquals(!=, <>)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [like](/docs/en/sql-reference/functions/string-search-functions.md/#like) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
|
||||
| [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
|
||||
| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✗ |
|
||||
| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ |
|
||||
| [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | ✔ |
|
||||
| [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | ✔ |
|
||||
| [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | ✔ |
|
||||
@ -620,7 +620,7 @@ The `TTL` clause can’t be used for key columns.
|
||||
#### Creating a table with `TTL`:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
CREATE TABLE tab
|
||||
(
|
||||
d DateTime,
|
||||
a Int TTL d + INTERVAL 1 MONTH,
|
||||
@ -635,7 +635,7 @@ ORDER BY d;
|
||||
#### Adding TTL to a column of an existing table
|
||||
|
||||
``` sql
|
||||
ALTER TABLE example_table
|
||||
ALTER TABLE tab
|
||||
MODIFY COLUMN
|
||||
c String TTL d + INTERVAL 1 DAY;
|
||||
```
|
||||
@ -643,7 +643,7 @@ ALTER TABLE example_table
|
||||
#### Altering TTL of the column
|
||||
|
||||
``` sql
|
||||
ALTER TABLE example_table
|
||||
ALTER TABLE tab
|
||||
MODIFY COLUMN
|
||||
c String TTL d + INTERVAL 1 MONTH;
|
||||
```
|
||||
@ -681,7 +681,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i
|
||||
#### Creating a table with `TTL`:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
CREATE TABLE tab
|
||||
(
|
||||
d DateTime,
|
||||
a Int
|
||||
@ -697,7 +697,7 @@ TTL d + INTERVAL 1 MONTH DELETE,
|
||||
#### Altering `TTL` of the table:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE example_table
|
||||
ALTER TABLE tab
|
||||
MODIFY TTL d + INTERVAL 1 DAY;
|
||||
```
|
||||
|
||||
@ -1366,7 +1366,7 @@ In this sample configuration:
|
||||
The statistic declaration is in the columns section of the `CREATE` query for tables from the `*MergeTree*` Family when we enable `set allow_experimental_statistic = 1`.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE example_table
|
||||
CREATE TABLE tab
|
||||
(
|
||||
a Int64 STATISTIC(tdigest),
|
||||
b Float64
|
||||
@ -1378,8 +1378,8 @@ ORDER BY a
|
||||
We can also manipulate statistics with `ALTER` statements.
|
||||
|
||||
```sql
|
||||
ALTER TABLE example_table ADD STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE example_table DROP STATISTIC a TYPE tdigest;
|
||||
ALTER TABLE tab ADD STATISTIC b TYPE tdigest;
|
||||
ALTER TABLE tab DROP STATISTIC a TYPE tdigest;
|
||||
```
|
||||
|
||||
These lightweight statistics aggregate information about distribution of values in columns.
|
||||
@ -1390,3 +1390,42 @@ They can be used for query optimization when we enable `set allow_statistic_opti
|
||||
- `tdigest`
|
||||
|
||||
Stores distribution of values from numeric columns in [TDigest](https://github.com/tdunning/t-digest) sketch.
|
||||
|
||||
## Column-level Settings {#column-level-settings}
|
||||
|
||||
Certain MergeTree settings can be override at column level:
|
||||
|
||||
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
|
||||
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE tab
|
||||
(
|
||||
id Int64,
|
||||
document String SETTINGS (min_compress_block_size = 16777216, max_compress_block_size = 16777216)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
```
|
||||
|
||||
Column-level settings can be modified or removed using [ALTER MODIFY COLUMN](/docs/en/sql-reference/statements/alter/column.md), for example:
|
||||
|
||||
- Remove `SETTINGS` from column declaration:
|
||||
|
||||
```sql
|
||||
ALTER TABLE tab MODIFY COLUMN document REMOVE SETTINGS;
|
||||
```
|
||||
|
||||
- Modify a setting:
|
||||
|
||||
```sql
|
||||
ALTER TABLE tab MODIFY COLUMN document MODIFY SETTING min_compress_block_size = 8192;
|
||||
```
|
||||
|
||||
- Reset one or more settings, also removes the setting declaration in the column expression of the table's CREATE query.
|
||||
|
||||
```sql
|
||||
ALTER TABLE tab MODIFY COLUMN document RESET SETTING min_compress_block_size;
|
||||
```
|
||||
|
@ -2356,6 +2356,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
|
||||
### Arrow format settings {#parquet-format-settings}
|
||||
|
||||
- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
|
||||
- [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`.
|
||||
- [output_format_arrow_use_signed_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_signed_indexes_for_dictionary) - use signed integer type for Dictionary indexes. Default value - `true`.
|
||||
- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
|
||||
- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
|
||||
- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
|
||||
|
207
docs/en/operations/allocation-profiling.md
Normal file
207
docs/en/operations/allocation-profiling.md
Normal file
@ -0,0 +1,207 @@
|
||||
---
|
||||
slug: /en/operations/allocation-profiling
|
||||
sidebar_label: "Allocation profiling"
|
||||
title: "Allocation profiling"
|
||||
---
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Allocation profiling
|
||||
|
||||
ClickHouse uses [jemalloc](https://github.com/jemalloc/jemalloc) as its global allocator that comes with some tools for allocation sampling and profiling.
|
||||
To make allocation profiling more convenient, `SYSTEM` commands are provided along 4LW commands in Keeper.
|
||||
|
||||
## Sampling allocations and flushing heap profiles
|
||||
|
||||
If we want to sample and profile allocations in `jemalloc`, we need to start ClickHouse/Keeper with profiling enabled using environment variable `MALLOC_CONF`.
|
||||
|
||||
```sh
|
||||
MALLOC_CONF=background_thread:true,prof:true
|
||||
```
|
||||
|
||||
`jemalloc` will sample allocation and store the information internally.
|
||||
|
||||
We can tell `jemalloc` to flush current profile by running:
|
||||
|
||||
<Tabs groupId="binary">
|
||||
<TabItem value="clickhouse" label="ClickHouse">
|
||||
|
||||
SYSTEM JEMALLOC FLUSH PROFILE
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="keeper" label="Keeper">
|
||||
|
||||
echo jmfp | nc localhost 9181
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
By default, heap profile file will be generated in `/tmp/jemalloc_clickhouse._pid_._seqnum_.heap` where `_pid_` is the PID of ClickHouse and `_seqnum_` is the global sequence number for the current heap profile.
|
||||
For Keeper, the default file is `/tmp/jemalloc_keeper._pid_._seqnum_.heap` following the same rules.
|
||||
|
||||
A different location can be defined by appending the `MALLOC_CONF` environment variable with `prof_prefix` option.
|
||||
For example, if we want to generate profiles in `/data` folder where the prefix for filename will be `my_current_profile` we can run ClickHouse/Keeper with following environment variable:
|
||||
```sh
|
||||
MALLOC_CONF=background_thread:true,prof:true,prof_prefix:/data/my_current_profile
|
||||
```
|
||||
Generated file will append to prefix PID and sequence number.
|
||||
|
||||
## Analyzing heap profiles
|
||||
|
||||
After we generated heap profiles, we need to analyze them.
|
||||
For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/jemalloc/jemalloc/blob/dev/bin/jeprof.in) which can be installed in multiple ways:
|
||||
- installing `jemalloc` using system's package manager
|
||||
- cloning [jemalloc repo](https://github.com/jemalloc/jemalloc) and running autogen.sh from the root folder that will provide you with `jeprof` script inside the `bin` folder
|
||||
|
||||
:::note
|
||||
`jeprof` uses `addr2line` to generate stacktraces which can be really slow.
|
||||
If that’s the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool.
|
||||
|
||||
```
|
||||
git clone https://github.com/gimli-rs/addr2line
|
||||
cd addr2line
|
||||
cargo b --examples -r
|
||||
cp ./target/release/examples/addr2line path/to/current/addr2line
|
||||
```
|
||||
:::
|
||||
|
||||
There are many different formats to generate from the heap profile using `jeprof`.
|
||||
We recommend to run `jeprof --help` to check usage and many different options the tool provides.
|
||||
|
||||
In general, `jeprof` command will look like this:
|
||||
|
||||
```sh
|
||||
jeprof path/to/binary path/to/heap/profile --output_format [ > output_file]
|
||||
```
|
||||
|
||||
If we want to compare which allocations happened between 2 profiles we can set the base argument:
|
||||
|
||||
```sh
|
||||
jeprof path/to/binary --base path/to/first/heap/profile path/to/second/heap/profile --output_format [ > output_file]
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
- if we want to generate a text file with each procedure written per line:
|
||||
|
||||
```sh
|
||||
jeprof path/to/binary path/to/heap/profile --text > result.txt
|
||||
```
|
||||
|
||||
- if we want to generate a PDF file with call-graph:
|
||||
|
||||
```sh
|
||||
jeprof path/to/binary path/to/heap/profile --pdf > result.pdf
|
||||
```
|
||||
|
||||
### Generating flame graph
|
||||
|
||||
`jeprof` allows us to generate collapsed stacks for building flame graphs.
|
||||
|
||||
We need to use `--collapsed` argument:
|
||||
|
||||
```sh
|
||||
jeprof path/to/binary path/to/heap/profile --collapsed > result.collapsed
|
||||
```
|
||||
|
||||
After that, we can use many different tools to visualize collapsed stacks.
|
||||
|
||||
Most popular would be [FlameGraph](https://github.com/brendangregg/FlameGraph) which contains a script called `flamegraph.pl`:
|
||||
|
||||
```sh
|
||||
cat result.collapsed | /path/to/FlameGraph/flamegraph.pl --color=mem --title="Allocation Flame Graph" --width 2400 > result.svg
|
||||
```
|
||||
|
||||
Another interesting tool is [speedscope](https://www.speedscope.app/) that allows you to analyze collected stacks in a more interactive way.
|
||||
|
||||
## Controlling allocation profiler during runtime
|
||||
|
||||
If ClickHouse/Keeper were started with enabled profiler, they support additional commands for disabling/enabling allocation profiling during runtime.
|
||||
Using those commands, it's easier to profile only specific intervals.
|
||||
|
||||
Disable profiler:
|
||||
|
||||
<Tabs groupId="binary">
|
||||
<TabItem value="clickhouse" label="ClickHouse">
|
||||
|
||||
SYSTEM JEMALLOC DISABLE PROFILE
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="keeper" label="Keeper">
|
||||
|
||||
echo jmdp | nc localhost 9181
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
Enable profiler:
|
||||
|
||||
<Tabs groupId="binary">
|
||||
<TabItem value="clickhouse" label="ClickHouse">
|
||||
|
||||
SYSTEM JEMALLOC ENABLE PROFILE
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="keeper" label="Keeper">
|
||||
|
||||
echo jmep | nc localhost 9181
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
It's also possible to control the initial state of the profiler by setting `prof_active` option which is enabled by default.
|
||||
For example, if we don't want to sample allocations during startup but only after we enable the profiler, we can start ClickHouse/Keeper with following environment variable:
|
||||
```sh
|
||||
MALLOC_CONF=background_thread:true,prof:true,prof_active:false
|
||||
```
|
||||
|
||||
and enable profiler at a later point.
|
||||
|
||||
## Additional options for profiler
|
||||
|
||||
`jemalloc` has many different options available related to profiler which can be controlled by modifying `MALLOC_CONF` environment variable.
|
||||
For example, interval between allocation samples can be controlled with `lg_prof_sample`.
|
||||
If you want to dump heap profile every N bytes you can enable it using `lg_prof_interval`.
|
||||
|
||||
We recommend to check `jemalloc`s [reference page](https://jemalloc.net/jemalloc.3.html) for such options.
|
||||
|
||||
## Other resources
|
||||
|
||||
ClickHouse/Keeper expose `jemalloc` related metrics in many different ways.
|
||||
|
||||
:::warning Warning
|
||||
It's important to be aware that none of these metrics are synchronized with each other and values may drift.
|
||||
:::
|
||||
|
||||
### System table `asynchronous_metrics`
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM system.asynchronous_metrics
|
||||
WHERE metric ILIKE '%jemalloc%'
|
||||
FORMAT Vertical
|
||||
```
|
||||
|
||||
[Reference](/en/operations/system-tables/asynchronous_metrics)
|
||||
|
||||
### System table `jemalloc_bins`
|
||||
|
||||
Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas.
|
||||
|
||||
[Reference](/en/operations/system-tables/jemalloc_bins)
|
||||
|
||||
### Prometheus
|
||||
|
||||
All `jemalloc` related metrics from `asynchronous_metrics` are also exposed using Prometheus endpoint in both ClickHouse and Keeper.
|
||||
|
||||
[Reference](/en/operations/server-configuration-parameters/settings#prometheus)
|
||||
|
||||
### `jmst` 4LW command in Keeper
|
||||
|
||||
Keeper supports `jmst` 4LW command which returns [basic allocator statistics](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Basic-Allocator-Statistics).
|
||||
|
||||
Example:
|
||||
```sh
|
||||
echo jmst | nc localhost 9181
|
||||
```
|
@ -65,6 +65,20 @@ With Cluster Discovery, rather than defining each node explicitly, you simply sp
|
||||
<cluster_name>
|
||||
<discovery>
|
||||
<path>/clickhouse/discovery/cluster_name</path>
|
||||
|
||||
<!-- # Optional configuration parameters: -->
|
||||
|
||||
<!-- ## Authentication credentials to access all other nodes in cluster: -->
|
||||
<!-- <user>user1</user> -->
|
||||
<!-- <password>pass123</password> -->
|
||||
<!-- ### Alternatively to password, interserver secret may be used: -->
|
||||
<!-- <secret>secret123</secret> -->
|
||||
|
||||
<!-- ## Shard for current node (see below): -->
|
||||
<!-- <shard>1</shard> -->
|
||||
|
||||
<!-- ## Observer mode (see below): -->
|
||||
<!-- <observer/> -->
|
||||
</discovery>
|
||||
</cluster_name>
|
||||
</remote_servers>
|
||||
|
@ -5,9 +5,9 @@ sidebar_label: "Named collections"
|
||||
title: "Named collections"
|
||||
---
|
||||
|
||||
Named collections provide a way to store collections of key-value pairs to be
|
||||
Named collections provide a way to store collections of key-value pairs to be
|
||||
used to configure integrations with external sources. You can use named collections with
|
||||
dictionaries, tables, table functions, and object storage.
|
||||
dictionaries, tables, table functions, and object storage.
|
||||
|
||||
Named collections can be configured with DDL or in configuration files and are applied
|
||||
when ClickHouse starts. They simplify the creation of objects and the hiding of credentials
|
||||
@ -64,7 +64,7 @@ To manage named collections with DDL a user must have the `named_control_collect
|
||||
```
|
||||
|
||||
:::tip
|
||||
In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
|
||||
In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
|
||||
:::
|
||||
|
||||
## Storing named collections in configuration files
|
||||
@ -296,7 +296,6 @@ host = '127.0.0.1',
|
||||
port = 5432,
|
||||
database = 'test',
|
||||
schema = 'test_schema',
|
||||
connection_pool_size = 8
|
||||
```
|
||||
|
||||
Example of configuration:
|
||||
@ -310,7 +309,6 @@ Example of configuration:
|
||||
<port>5432</port>
|
||||
<database>test</database>
|
||||
<schema>test_schema</schema>
|
||||
<connection_pool_size>8</connection_pool_size>
|
||||
</mypg>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
@ -445,4 +443,3 @@ SELECT dictGet('dict', 'b', 1);
|
||||
│ a │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
|
||||
This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::note
|
||||
Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
|
||||
|
@ -2866,3 +2866,10 @@ This also allows a mix of resolver types can be used.
|
||||
### disable_tunneling_for_https_requests_over_http_proxy {#disable_tunneling_for_https_requests_over_http_proxy}
|
||||
|
||||
By default, tunneling (i.e, `HTTP CONNECT`) is used to make `HTTPS` requests over `HTTP` proxy. This setting can be used to disable it.
|
||||
|
||||
## max_materialized_views_count_for_table {#max_materialized_views_count_for_table}
|
||||
|
||||
A limit on the number of materialized views attached to a table.
|
||||
Note that only directly dependent views are considered here, and the creation of one view on top of another view is not considered.
|
||||
|
||||
Default value: `0`.
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_label: Settings Overview
|
||||
title: "Settings Overview"
|
||||
sidebar_position: 1
|
||||
slug: /en/operations/settings/
|
||||
pagination_next: en/operations/settings/settings
|
||||
@ -16,11 +16,34 @@ There are two main groups of ClickHouse settings:
|
||||
- Global server settings
|
||||
- Query-level settings
|
||||
|
||||
The main distinction between global server settings and query-level settings is that
|
||||
global server settings must be set in configuration files while query-level settings
|
||||
can be set in configuration files or with SQL queries.
|
||||
The main distinction between global server settings and query-level settings is that global server settings must be set in configuration files, while query-level settings can be set in configuration files or with SQL queries.
|
||||
|
||||
Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level.
|
||||
|
||||
Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level.
|
||||
Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query level.
|
||||
|
||||
## See non-default settings
|
||||
|
||||
To view which settings have been changed from their default value:
|
||||
|
||||
```sql
|
||||
SELECT name, value FROM system.settings WHERE changed
|
||||
```
|
||||
|
||||
If you haven't changed any settings from their default value, then ClickHouse will return nothing.
|
||||
|
||||
To check the value of a particular setting, specify the `name` of the setting in your query:
|
||||
|
||||
```sql
|
||||
SELECT name, value FROM system.settings WHERE name = 'max_threads'
|
||||
```
|
||||
|
||||
This command should return something like:
|
||||
|
||||
```response
|
||||
┌─name────────┬─value─────┐
|
||||
│ max_threads │ 'auto(8)' │
|
||||
└─────────────┴───────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
@ -172,7 +172,7 @@ If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use c
|
||||
|
||||
## timeout_overflow_mode {#timeout-overflow-mode}
|
||||
|
||||
What to do if the query is run longer than `max_execution_time`: `throw` or `break`. By default, `throw`.
|
||||
What to do if the query is run longer than `max_execution_time` or the estimated running time is longer than `max_estimated_execution_time`: `throw` or `break`. By default, `throw`.
|
||||
|
||||
# max_execution_time_leaf
|
||||
|
||||
@ -214,6 +214,10 @@ A maximum number of execution bytes per second. Checked on every data block when
|
||||
|
||||
Checks that execution speed is not too slow (no less than ‘min_execution_speed’), after the specified time in seconds has expired.
|
||||
|
||||
## max_estimated_execution_time {#max_estimated_execution_time}
|
||||
|
||||
Maximum query estimate execution time in seconds. Checked on every data block when ‘timeout_before_checking_execution_speed’ expires.
|
||||
|
||||
## max_columns_to_read {#max-columns-to-read}
|
||||
|
||||
A maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception.
|
||||
|
@ -1269,6 +1269,28 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### output_format_arrow_use_signed_indexes_for_dictionary {#output_format_arrow_use_signed_indexes_for_dictionary}
|
||||
|
||||
Use signed integer types instead of unsigned in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Unsigned integer types are used for indexes in `DICTIONARY` type.
|
||||
- 1 — Signed integer types are used for indexes in `DICTIONARY` type.
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
### output_format_arrow_use_64_bit_indexes_for_dictionary {#output_format_arrow_use_64_bit_indexes_for_dictionary}
|
||||
|
||||
Use 64-bit integer type in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Type for indexes in `DICTIONARY` type is determined automatically.
|
||||
- 1 — 64-bit integer type is used for indexes in `DICTIONARY` type.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### output_format_arrow_string_as_string {#output_format_arrow_string_as_string}
|
||||
|
||||
Use Arrow String type instead of Binary for String columns.
|
||||
@ -1575,7 +1597,13 @@ Result:
|
||||
|
||||
Use ANSI escape sequences to paint colors in Pretty formats.
|
||||
|
||||
Enabled by default.
|
||||
possible values:
|
||||
|
||||
- `0` — Disabled. Pretty formats do not use ANSI escape sequences.
|
||||
- `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats.
|
||||
- `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats.
|
||||
|
||||
Default value is `auto`.
|
||||
|
||||
### output_format_pretty_grid_charset {#output_format_pretty_grid_charset}
|
||||
|
||||
|
@ -5197,3 +5197,13 @@ The value 0 means that you can delete all tables without any restrictions.
|
||||
:::note
|
||||
This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)
|
||||
:::
|
||||
|
||||
## iceberg_engine_ignore_schema_evolution {#iceberg_engine_ignore_schema_evolution}
|
||||
|
||||
Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation.
|
||||
|
||||
:::note
|
||||
Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema.
|
||||
:::
|
||||
|
||||
Default value: 'false'.
|
@ -10,7 +10,7 @@ Columns:
|
||||
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
|
||||
- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name.
|
||||
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
|
||||
|
||||
**Example**
|
||||
|
@ -49,7 +49,7 @@ Columns:
|
||||
|
||||
- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted.
|
||||
|
||||
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks.
|
||||
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times the action was postponed.
|
||||
|
||||
- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed.
|
||||
|
||||
|
@ -88,6 +88,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
|
||||
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
|
||||
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
|
||||
- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
|
||||
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
|
||||
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
|
||||
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
|
||||
|
@ -18,6 +18,7 @@ Functions:
|
||||
- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
|
||||
- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
|
||||
- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
|
||||
- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -0,0 +1,61 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
|
||||
sidebar_position: 211
|
||||
title: quantileDDSketch
|
||||
---
|
||||
|
||||
Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantileDDsketch[relative_accuracy, (level)](expr)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `relative_accuracy` — Relative accuracy of the quantile. Possible values are in the range from 0 to 1. [Float](../../../sql-reference/data-types/float.md). The size of the sketch depends on the range of the data and the relative accuracy. The larger the range and the smaller the relative accuracy, the larger the sketch. The rough memory size of the of the sketch is `log(max_value/min_value)/relative_accuracy`. The recommended value is 0.001 or higher.
|
||||
|
||||
- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Approximate quantile of the specified level.
|
||||
|
||||
Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
|
||||
|
||||
**Example**
|
||||
|
||||
Input table has an integer and a float columns:
|
||||
|
||||
``` text
|
||||
┌─a─┬─────b─┐
|
||||
│ 1 │ 1.001 │
|
||||
│ 2 │ 1.002 │
|
||||
│ 3 │ 1.003 │
|
||||
│ 4 │ 1.004 │
|
||||
└───┴───────┘
|
||||
```
|
||||
|
||||
Query to calculate 0.75-quantile (third quartile):
|
||||
|
||||
``` sql
|
||||
SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐
|
||||
│ 2.974233423476717 │ 1.01 │
|
||||
└─────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
|
||||
- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
|
@ -9,7 +9,7 @@ sidebar_position: 201
|
||||
|
||||
Syntax: `quantiles(level1, level2, …)(x)`
|
||||
|
||||
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
|
||||
All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
|
||||
|
||||
## quantilesExactExclusive
|
||||
|
||||
|
@ -18,6 +18,12 @@ Supported range of values: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\].
|
||||
|
||||
Resolution: 1 second.
|
||||
|
||||
## Speed
|
||||
|
||||
The `Date` datatype is faster than `DateTime` under _most_ conditions.
|
||||
|
||||
The `Date` type requires 2 bytes of storage, while `DateTime` requires 4. However, when the database compresses the database, this difference is amplified. This amplification is due to the minutes and seconds in `DateTime` being less compressible. Filtering and aggregating `Date` instead of `DateTime` is also faster.
|
||||
|
||||
## Usage Remarks
|
||||
|
||||
The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’).
|
||||
|
@ -35,8 +35,8 @@ Types are equivalent to types of C:
|
||||
|
||||
Aliases:
|
||||
|
||||
- `Float32` — `FLOAT`.
|
||||
- `Float64` — `DOUBLE`.
|
||||
- `Float32` — `FLOAT`, `REAL`, `SINGLE`.
|
||||
- `Float64` — `DOUBLE`, `DOUBLE PRECISION`.
|
||||
|
||||
When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them.
|
||||
|
||||
|
@ -21,10 +21,10 @@ When creating tables, numeric parameters for integer numbers can be set (e.g. `T
|
||||
|
||||
Aliases:
|
||||
|
||||
- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
|
||||
- `Int16` — `SMALLINT`, `INT2`.
|
||||
- `Int32` — `INT`, `INT4`, `INTEGER`.
|
||||
- `Int64` — `BIGINT`.
|
||||
- `Int8` — `TINYINT`, `INT1`, `BYTE`, `TINYINT SIGNED`, `INT1 SIGNED`.
|
||||
- `Int16` — `SMALLINT`, `SMALLINT SIGNED`.
|
||||
- `Int32` — `INT`, `INTEGER`, `MEDIUMINT`, `MEDIUMINT SIGNED`, `INT SIGNED`, `INTEGER SIGNED`.
|
||||
- `Int64` — `BIGINT`, `SIGNED`, `BIGINT SIGNED`, `TIME`.
|
||||
|
||||
## UInt Ranges
|
||||
|
||||
@ -34,3 +34,11 @@ Aliases:
|
||||
- `UInt64` — \[0 : 18446744073709551615\]
|
||||
- `UInt128` — \[0 : 340282366920938463463374607431768211455\]
|
||||
- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
|
||||
|
||||
Aliases:
|
||||
|
||||
- `UInt8` — `TINYINT UNSIGNED`, `INT1 UNSIGNED`.
|
||||
- `UInt16` — `SMALLINT UNSIGNED`.
|
||||
- `UInt32` — `MEDIUMINT UNSIGNED`, `INT UNSIGNED`, `INTEGER UNSIGNED`
|
||||
- `UInt64` — `UNSIGNED`, `BIGINT UNSIGNED`, `BIT`, `SET`
|
||||
|
||||
|
@ -4,11 +4,11 @@ sidebar_position: 55
|
||||
sidebar_label: Nullable
|
||||
---
|
||||
|
||||
# Nullable(typename)
|
||||
# Nullable(T)
|
||||
|
||||
Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
|
||||
Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `T`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
|
||||
|
||||
For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
|
||||
`T` can’t be any of the composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md) but composite data types can contain `Nullable` type values, e.g. `Array(Nullable(Int8))`.
|
||||
|
||||
A `Nullable` type field can’t be included in table indexes.
|
||||
|
||||
|
@ -657,6 +657,43 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;
|
||||
|
||||
Array elements set to `NULL` are handled as normal values.
|
||||
|
||||
## arrayShingles
|
||||
|
||||
Generates an array of "shingles", i.e. consecutive sub-arrays with specified length of the input array.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
arrayShingles(array, length)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `array` — Input array [Array](../../sql-reference/data-types/array.md).
|
||||
- `length` — The length of each shingle.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of generated shingles.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT arrayShingles([1,2,3,4], 3) as res;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─res───────────────┐
|
||||
│ [[1,2,3],[2,3,4]] │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## arraySort(\[func,\] arr, …) {#sort}
|
||||
|
||||
Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
|
||||
|
@ -23,10 +23,11 @@ The following actions are supported:
|
||||
- [RENAME COLUMN](#rename-column) — Renames an existing column.
|
||||
- [CLEAR COLUMN](#clear-column) — Resets column values.
|
||||
- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column.
|
||||
- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL.
|
||||
- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression, TTL, and column settings.
|
||||
- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties.
|
||||
- [MODIFY COLUMN MODIFY SETTING](#modify-column-modify-setting) - Changes column settings.
|
||||
- [MODIFY COLUMN RESET SETTING](#modify-column-reset-setting) - Reset column settings.
|
||||
- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing.
|
||||
|
||||
These actions are described in detail below.
|
||||
|
||||
## ADD COLUMN
|
||||
@ -75,7 +76,7 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,
|
||||
|
||||
Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.
|
||||
|
||||
:::tip
|
||||
:::tip
|
||||
You can’t delete a column if it is referenced by [materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized). Otherwise, it returns an error.
|
||||
:::
|
||||
|
||||
@ -208,7 +209,7 @@ The `ALTER` query for changing columns is replicated. The instructions are saved
|
||||
|
||||
## MODIFY COLUMN REMOVE
|
||||
|
||||
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
|
||||
Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`, `SETTING`.
|
||||
|
||||
Syntax:
|
||||
|
||||
@ -228,6 +229,43 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
|
||||
|
||||
- [REMOVE TTL](ttl.md).
|
||||
|
||||
|
||||
## MODIFY COLUMN MODIFY SETTING
|
||||
|
||||
Modify a column setting.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING name=value,...;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Modify column's `max_compress_block_size` to `1MB`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY COLUMN MODIFY SETTING max_compress_block_size = 1048576;
|
||||
```
|
||||
|
||||
## MODIFY COLUMN RESET SETTING
|
||||
|
||||
Reset a column setting, also removes the setting declaration in the column expression of the table's CREATE query.
|
||||
|
||||
Syntax:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY COLUMN RESET SETTING name,...;
|
||||
```
|
||||
|
||||
**Example**
|
||||
|
||||
Remove column setting `max_compress_block_size` to `1MB`:
|
||||
|
||||
```sql
|
||||
ALTER TABLE table_name MODIFY COLUMN REMOVE SETTING max_compress_block_size;
|
||||
```
|
||||
|
||||
## MATERIALIZE COLUMN
|
||||
|
||||
Materializes or updates a column with an expression for a default value (`DEFAULT` or `MATERIALIZED`).
|
||||
|
@ -293,6 +293,8 @@ You can't combine both ways in one query.
|
||||
|
||||
Along with columns descriptions constraints could be defined:
|
||||
|
||||
### CONSTRAINT
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
@ -307,6 +309,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
|
||||
Adding large amount of constraints can negatively affect performance of big `INSERT` queries.
|
||||
|
||||
### ASSUME
|
||||
|
||||
The `ASSUME` clause is used to define a `CONSTRAINT` on a table that is assumed to be true. This constraint can then be used by the optimizer to enhance the performance of SQL queries.
|
||||
|
||||
Take this example where `ASSUME CONSTRAINT` is used in the creation of the `users_a` table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE users_a (
|
||||
uid Int16,
|
||||
name String,
|
||||
age Int16,
|
||||
name_len UInt8 MATERIALIZED length(name),
|
||||
CONSTRAINT c1 ASSUME length(name) = name_len
|
||||
)
|
||||
ENGINE=MergeTree
|
||||
ORDER BY (name_len, name);
|
||||
```
|
||||
|
||||
Here, `ASSUME CONSTRAINT` is used to assert that the `length(name)` function always equals the value of the `name_len` column. This means that whenever `length(name)` is called in a query, ClickHouse can replace it with `name_len`, which should be faster because it avoids calling the `length()` function.
|
||||
|
||||
Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;`, ClickHouse can optimize it to `SELECT name FROM users_a WHERE name_len < 5`; because of the `ASSUME CONSTRAINT`. This can make the query run faster because it avoids calculating the length of `name` for each row.
|
||||
|
||||
`ASSUME CONSTRAINT` **does not enforce the constraint**, it merely informs the optimizer that the constraint holds true. If the constraint is not actually true, the results of the queries may be incorrect. Therefore, you should only use `ASSUME CONSTRAINT` if you are sure that the constraint is true.
|
||||
|
||||
## TTL Expression
|
||||
|
||||
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
|
||||
|
@ -9,10 +9,6 @@ sidebar_label: RENAME
|
||||
Renames databases, tables, or dictionaries. Several entities can be renamed in a single query.
|
||||
Note that the `RENAME` query with several entities is non-atomic operation. To swap entities names atomically, use the [EXCHANGE](./exchange.md) statement.
|
||||
|
||||
:::note
|
||||
The `RENAME` query is supported by the [Atomic](../../engines/database-engines/atomic.md) database engine only.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
|
@ -343,13 +343,14 @@ SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_
|
||||
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds.
|
||||
|
||||
``` sql
|
||||
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
|
||||
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
|
||||
```
|
||||
|
||||
After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported:
|
||||
|
||||
- If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue.
|
||||
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
|
||||
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
|
||||
Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas.
|
||||
- If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.
|
||||
|
||||
### SYNC DATABASE REPLICA
|
||||
|
@ -7,7 +7,7 @@ keywords: [udf, user defined function, clickhouse, executable, table, function]
|
||||
|
||||
# executable Table Function for UDFs
|
||||
|
||||
The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source.
|
||||
The `executable` table function creates a table based on the output of a user-defined function (UDF) that you define in a script that outputs rows to **stdout**. The executable script is stored in the `users_scripts` directory and can read data from any source. Make sure your ClickHouse server has all the required packages to run the executable script. For example, if it is a Python script, ensure that the server has the necessary Python packages installed.
|
||||
|
||||
You can optionally include one or more input queries that stream their results to **stdin** for the script to read.
|
||||
|
||||
@ -63,7 +63,7 @@ if __name__ == "__main__":
|
||||
Let's invoke the script and have it generate 10 random strings:
|
||||
|
||||
```sql
|
||||
SELECT * FROM executable('my_script.py', TabSeparated, 'id UInt32, random String', (SELECT 10))
|
||||
SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', (SELECT 10))
|
||||
```
|
||||
|
||||
The response looks like:
|
||||
|
@ -9,7 +9,7 @@ sidebar_label: fuzzJSON
|
||||
Perturbs a JSON string with random variations.
|
||||
|
||||
``` sql
|
||||
fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
|
||||
fuzzJSON({ named_collection [, option=value [,..]] | json_str[, random_seed] })
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -16,7 +16,8 @@ If you have multiple replicas in your cluster, you can use the [s3Cluster functi
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression])
|
||||
gcs(url [, NOSIGN | hmac_key, hmac_secret] [,format] [,structure] [,compression_method])
|
||||
gcs(named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
:::tip GCS
|
||||
@ -24,10 +25,9 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML
|
||||
|
||||
:::
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
**Parameters**
|
||||
|
||||
- `url` — Bucket path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
:::note GCS
|
||||
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
```
|
||||
@ -35,10 +35,21 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML
|
||||
```
|
||||
and not ~~https://storage.cloud.google.com~~.
|
||||
:::
|
||||
- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `hmac_key` and `hmac_secret` — Keys that specify credentials to use with given endpoint. Optional.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
|
||||
|
||||
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
|
||||
|
||||
- `access_key_id` — `hmac_key`, optional.
|
||||
- `secret_access_key` — `hmac_secret`, optional.
|
||||
- `filename` — appended to the url if specified.
|
||||
- `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`.
|
||||
- `no_sign_request` — disabled by default.
|
||||
- `expiration_window_seconds` — default value is 120.
|
||||
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -61,7 +72,7 @@ LIMIT 2;
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
The similar but from file with `gzip` compression:
|
||||
The similar but from file with `gzip` compression method:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
@ -158,6 +169,16 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my-
|
||||
SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example:
|
||||
``` sql
|
||||
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
access_key_id = '***',
|
||||
secret_access_key = '***';
|
||||
SELECT count(*)
|
||||
FROM gcs(creds, url='https://s3-object-url.csv')
|
||||
```
|
||||
|
||||
## Partitioned Write
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
|
@ -11,31 +11,25 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause'])
|
||||
mysql({host:port, database, table, user, password[, replace_query, on_duplicate_clause] | named_collection[, option=value [,..]]})
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
**Parameters**
|
||||
|
||||
- `host:port` — MySQL server address.
|
||||
|
||||
- `database` — Remote database name.
|
||||
|
||||
- `table` — Remote table name.
|
||||
|
||||
- `user` — MySQL user.
|
||||
|
||||
- `password` — User password.
|
||||
|
||||
- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values:
|
||||
- `0` - The query is executed as `INSERT INTO`.
|
||||
- `1` - The query is executed as `REPLACE INTO`.
|
||||
|
||||
- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception).
|
||||
|
||||
Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;`
|
||||
|
||||
`on_duplicate_clause` here is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause.
|
||||
|
||||
Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment.
|
||||
|
||||
Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on the MySQL server.
|
||||
|
||||
The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes.
|
||||
@ -86,6 +80,18 @@ Selecting data from ClickHouse:
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
```
|
||||
|
||||
Or using [named collections](/docs/en/operations/named-collections.md):
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
host = 'localhost',
|
||||
port = 3306,
|
||||
database = 'test',
|
||||
user = 'bayonet',
|
||||
password = '123';
|
||||
SELECT * FROM mysql(creds, table='test');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─float─┐
|
||||
│ 1 │ 2 │
|
||||
|
@ -11,10 +11,10 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
|
||||
postgresql({host:port, database, table, user, password[, schema, [, on_conflict]] | named_collection[, option=value [,..]]})
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
**Parameters**
|
||||
|
||||
- `host:port` — PostgreSQL server address.
|
||||
- `database` — Remote database name.
|
||||
@ -22,6 +22,9 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`])
|
||||
- `user` — PostgreSQL user.
|
||||
- `password` — User password.
|
||||
- `schema` — Non-default table schema. Optional.
|
||||
- `on_conflict` — Conflict resolution strategy. Example: `ON CONFLICT DO NOTHING`. Optional.
|
||||
|
||||
Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md). In this case `host` and `port` should be specified separately. This approach is recommended for production environment.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
@ -86,12 +89,24 @@ postgresql> SELECT * FROM test;
|
||||
(1 row)
|
||||
```
|
||||
|
||||
Selecting data from ClickHouse:
|
||||
Selecting data from ClickHouse using plain arguments:
|
||||
|
||||
```sql
|
||||
SELECT * FROM postgresql('localhost:5432', 'test', 'test', 'postgresql_user', 'password') WHERE str IN ('test');
|
||||
```
|
||||
|
||||
Or using [named collections](/docs/en/operations/named-collections.md):
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION mypg AS
|
||||
host = 'localhost',
|
||||
port = 5432,
|
||||
database = 'test',
|
||||
user = 'postgresql_user',
|
||||
password = 'password';
|
||||
SELECT * FROM postgresql(mypg, table='test') WHERE str IN ('test');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─int_nullable─┬─float─┬─str──┬─float_nullable─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │ 2 │ test │ ᴺᵁᴸᴸ │
|
||||
|
@ -34,6 +34,7 @@ redis(host:port, key, structure[, db_index[, password[, pool_size]]])
|
||||
|
||||
- queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.
|
||||
|
||||
[Named collections](/docs/en/operations/named-collections.md) are not supported for `redis` table function at the moment.
|
||||
|
||||
**Returned Value**
|
||||
|
||||
@ -41,17 +42,7 @@ A table object with key as Redis key, other columns packaged together as Redis v
|
||||
|
||||
## Usage Example {#usage-example}
|
||||
|
||||
Create a table in ClickHouse which allows to read data from Redis:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE redis_table
|
||||
(
|
||||
`k` String,
|
||||
`m` String,
|
||||
`n` UInt32
|
||||
)
|
||||
ENGINE = Redis('redis1:6379') PRIMARY KEY(k);
|
||||
```
|
||||
Read from Redis:
|
||||
|
||||
```sql
|
||||
SELECT * FROM redis(
|
||||
@ -61,6 +52,15 @@ SELECT * FROM redis(
|
||||
)
|
||||
```
|
||||
|
||||
Insert into Redis:
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION redis(
|
||||
'redis1:6379',
|
||||
'key',
|
||||
'key String, v1 String, v2 UInt32') values ('1', '1', 1);
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [The `Redis` table engine](/docs/en/engines/table-engines/integrations/redis.md)
|
||||
|
@ -13,10 +13,12 @@ Both functions can be used in `SELECT` and `INSERT` queries.
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
|
||||
remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key])
|
||||
remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
remote(addresses_expr, [db, table, user [, password], sharding_key])
|
||||
remote(addresses_expr, [db.table, user [, password], sharding_key])
|
||||
remote(named_collection[, option=value [,..]])
|
||||
remoteSecure(addresses_expr, [db, table, user [, password], sharding_key])
|
||||
remoteSecure(addresses_expr, [db.table, user [, password], sharding_key])
|
||||
remoteSecure(named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
## Parameters
|
||||
@ -39,6 +41,8 @@ remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key])
|
||||
- `password` — User password. If not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `sharding_key` — Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
Arguments also can be passed using [named collections](/docs/en/operations/named-collections.md).
|
||||
|
||||
## Returned value
|
||||
|
||||
A table located on a remote server.
|
||||
@ -82,7 +86,16 @@ example01-01-1,example01-02-1
|
||||
SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
|
||||
```
|
||||
|
||||
### Inserting data from a remote server into a table:
|
||||
Or using [named collections](/docs/en/operations/named-collections.md):
|
||||
|
||||
```sql
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
host = '127.0.0.1',
|
||||
database = 'db';
|
||||
SELECT * FROM remote(creds, table='remote_engine_table') LIMIT 3;
|
||||
```
|
||||
|
||||
### Inserting data into a table on a remote server:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
|
||||
|
@ -16,33 +16,41 @@ When using the `s3 table function` with [`INSERT INTO...SELECT`](../../sql-refer
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
s3(path [, NOSIGN | aws_access_key_id, aws_secret_access_key [,session_token]] [,format] [,structure] [,compression])
|
||||
s3(url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method])
|
||||
s3(named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
:::tip GCS
|
||||
The S3 Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
|
||||
|
||||
For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_id` and `aws_secret_access_key`.
|
||||
For GCS, substitute your HMAC key and HMAC secret where you see `access_key_id` and `secret_access_key`.
|
||||
:::
|
||||
|
||||
**Arguments**
|
||||
**Parameters**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
`s3` table function supports the following plain parameters:
|
||||
|
||||
- `url` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
:::note GCS
|
||||
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
The GCS url is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
```
|
||||
https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
|
||||
```
|
||||
and not ~~https://storage.cloud.google.com~~.
|
||||
:::
|
||||
|
||||
- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
|
||||
- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
|
||||
- `session_token` - Session token to use with the given keys. Optional when passing keys.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
|
||||
|
||||
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
|
||||
|
||||
- `filename` — appended to the url if specified.
|
||||
- `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`.
|
||||
- `no_sign_request` — disabled by default.
|
||||
- `expiration_window_seconds` — default value is 120.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -82,7 +90,7 @@ FROM s3(
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
ClickHouse also can determine the compression of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically.
|
||||
ClickHouse also can determine the compression method of the file. For example, if the file was zipped up with a `.csv.gz` extension, ClickHouse would decompress the file automatically.
|
||||
:::
|
||||
|
||||
|
||||
@ -168,7 +176,7 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my-
|
||||
SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
Note. It is possible to specify custom URL mappers in the server configuration file. Example:
|
||||
Note. It is possible to specify custom URL mappers in the server configuration file. Example:
|
||||
``` sql
|
||||
SELECT * FROM s3('s3://clickhouse-public-datasets/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
@ -190,6 +198,16 @@ Custom mapper can be added into `config.xml`:
|
||||
</url_scheme_mappers>
|
||||
```
|
||||
|
||||
For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example:
|
||||
``` sql
|
||||
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
access_key_id = '***',
|
||||
secret_access_key = '***';
|
||||
SELECT count(*)
|
||||
FROM s3(creds, url='https://s3-object-url.csv')
|
||||
```
|
||||
|
||||
## Partitioned Write
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
|
@ -4,23 +4,34 @@ sidebar_position: 181
|
||||
sidebar_label: s3Cluster
|
||||
title: "s3Cluster Table Function"
|
||||
---
|
||||
This is an extension to the [s3](/docs/en/sql-reference/table-functions/s3.md) table function.
|
||||
|
||||
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) and Google Cloud Storage [Google Cloud Storage](https://cloud.google.com/storage/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
s3Cluster(cluster_name, source, [,access_key_id, secret_access_key, [session_token]] [,format] [,structure])
|
||||
s3Cluster(cluster_name, url [, NOSIGN | access_key_id, secret_access_key, [session_token]] [,format] [,structure] [,compression_method])
|
||||
s3Cluster(cluster_name, named_collection[, option=value [,..]])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
- `access_key_id`, `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
|
||||
- `url` — path to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
- `NOSIGN` — If this keyword is provided in place of credentials, all the requests will not be signed.
|
||||
- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional.
|
||||
- `session_token` - Session token to use with the given keys. Optional when passing keys.
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression_method` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression method by file extension.
|
||||
|
||||
Arguments can also be passed using [named collections](/docs/en/operations/named-collections.md). In this case `url`, `access_key_id`, `secret_access_key`, `format`, `structure`, `compression_method` work in the same way, and some extra parameters are supported:
|
||||
|
||||
- `filename` — appended to the url if specified.
|
||||
- `use_environment_credentials` — enabled by default, allows passing extra parameters using environment variables `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI`, `AWS_CONTAINER_CREDENTIALS_FULL_URI`, `AWS_CONTAINER_AUTHORIZATION_TOKEN`, `AWS_EC2_METADATA_DISABLED`.
|
||||
- `no_sign_request` — disabled by default.
|
||||
- `expiration_window_seconds` — default value is 120.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -47,6 +58,18 @@ Count the total amount of rows in all files in the cluster `cluster_simple`:
|
||||
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
||||
:::
|
||||
|
||||
For production use cases it is recommended to use [named collections](/docs/en/operations/named-collections.md). Here is the example:
|
||||
``` sql
|
||||
|
||||
CREATE NAMED COLLECTION creds AS
|
||||
access_key_id = 'minio'
|
||||
secret_access_key = 'minio123';
|
||||
SELECT count(*) FROM s3Cluster(
|
||||
'cluster_simple', creds, url='https://s3-object-url.csv',
|
||||
format='CSV', structure='name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'
|
||||
)
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [S3 engine](../../engines/table-engines/integrations/s3.md)
|
||||
|
@ -670,4 +670,4 @@ ENGINE = ReplicatedMergeTree(
|
||||
INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local;
|
||||
```
|
||||
|
||||
Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времнени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных.
|
||||
Репликация работает в режиме мультимастера. Это означает, что данные могут быть загружены на любую из реплик и система автоматически синхронизирует данные между остальными репликами. Репликация асинхронна, то есть в конкретный момент времени не все реплики могут содержать недавно добавленные данные. Как минимум одна реплика должна быть в строю для приёма данных. Прочие реплики синхронизируются и восстановят согласованное состояния как только снова станут активными. Заметим, что при таком подходе есть вероятность утраты недавно добавленных данных.
|
||||
|
@ -2796,6 +2796,17 @@ SELECT TOP 3 name, value FROM system.settings;
|
||||
3. │ max_block_size │ 65505 │
|
||||
└─────────────────────────┴─────────┘
|
||||
```
|
||||
### output_format_pretty_color {#output_format_pretty_color}
|
||||
|
||||
Включает/выключает управляющие последовательности ANSI в форматах Pretty.
|
||||
|
||||
Возможные значения:
|
||||
|
||||
- `0` — выключена. Не исползует ANSI последовательности в форматах Pretty.
|
||||
- `1` — включена. Исползует ANSI последовательности с исключением форматов `NoEscapes`.
|
||||
- `auto` - включена если `stdout` является терминалом с исключением форматов `NoEscapes`.
|
||||
|
||||
Значение по умолчанию: `auto`
|
||||
|
||||
## system_events_show_zero_values {#system_events_show_zero_values}
|
||||
|
||||
|
@ -49,7 +49,7 @@ slug: /ru/operations/system-tables/replication_queue
|
||||
|
||||
- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу.
|
||||
|
||||
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач.
|
||||
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество откладываний запуска задачи.
|
||||
|
||||
- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача.
|
||||
|
||||
|
@ -280,7 +280,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
|
||||
Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд:
|
||||
|
||||
``` sql
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
|
||||
```
|
||||
|
||||
После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы:
|
||||
|
@ -1,27 +1,27 @@
|
||||
---
|
||||
slug: /zh/faq/general/ne-tormozit
|
||||
title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\
|
||||
\u201D mean?"
|
||||
title: "\u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\
|
||||
\u201D 是什么意思?"
|
||||
toc_hidden: true
|
||||
sidebar_position: 11
|
||||
---
|
||||
|
||||
# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean}
|
||||
# “Не тормозит” 是什么意思? {#what-does-ne-tormozit-mean}
|
||||
|
||||
This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front.
|
||||
这个问题通常出现在人们看到官方 ClickHouse T恤时。它们的正面印有大字**“ClickHouse не тормозит”**。
|
||||
|
||||
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
|
||||
在 ClickHouse 开源之前,它作为俄罗斯最大的 IT 公司 [Yandex](https://yandex.com/company/) 的内部存储系统而开发。这就是为什么它最初获得了俄文口号“не тормозит”(发音为“ne tormozit”)。在开源发布后,我们首先为俄罗斯的活动制作了一些这样的T恤,使用原汁原味的口号是理所当然的。
|
||||
|
||||
One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it.
|
||||
其中一批这样的T恤原本打算在俄罗斯之外的活动中赠送,我们尝试制作口号的英文版本。不幸的是,俄语在表达方面有些优雅,而且T恤上的空间有限,所以我们未能提出足够好的翻译(大多数选项要么太长,要么不够准确),并决定即使在为国际活动制作的T恤上也保留俄文口号。这被证明是一个绝妙的决定,因为全世界的人们看到它时都会感到惊喜和好奇。
|
||||
|
||||
So, what does it mean? Here are some ways to translate *“не тормозит”*:
|
||||
那么,它是什么意思呢?以下是翻译“не тормозит”的一些方式:
|
||||
|
||||
- If you translate it literally, it’d be something like *“ClickHouse does not press the brake pedal”*.
|
||||
- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If your larger system lags, it’s not because it uses ClickHouse”*.
|
||||
- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse does not lag”* or just *“ClickHouse is fast”*.
|
||||
- 如果你直译,那就是“ClickHouse 不踩刹车”。
|
||||
- 如果你想尽可能接近一个有 IT 背景的俄罗斯人的听觉感受,那就是“如果你的大型系统延迟,不是因为它使用了 ClickHouse”。
|
||||
- 更短,但不那么精确的版本可能是“ClickHouse 不慢”,“ClickHouse 不卡顿”或仅仅“ClickHouse 很快”。
|
||||
|
||||
If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one:
|
||||
如果您还没有亲眼见过这些 T恤,可以在许多与 ClickHouse 相关的视频中在线查看。例如,这个:
|
||||
|
||||
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
|
||||
|
||||
P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.com/#meet), usually for best questions or other forms of active participation.
|
||||
附言:这些 T恤不出售,它们在大多数 [ClickHouse 聚会](https://clickhouse.com/#meet)上免费赠送,通常是给出最佳问题或其他形式的积极参与者。
|
||||
|
@ -1,63 +1,63 @@
|
||||
---
|
||||
slug: /zh/faq/general/why-clickhouse-is-so-fast
|
||||
title: Why is ClickHouse so fast?
|
||||
title: 为什么 ClickHouse 如此快速?
|
||||
toc_hidden: true
|
||||
sidebar_position: 8
|
||||
---
|
||||
|
||||
# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast}
|
||||
# 为什么 ClickHouse 如此快速? {#why-clickhouse-is-so-fast}
|
||||
|
||||
It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system.
|
||||
它被设计成一个快速的系统。在开发过程中,查询执行性能一直是首要考虑的优先级,但也考虑了其他重要特性,如用户友好性、可扩展性和安全性,使 ClickHouse 成为一个真正的生产系统。
|
||||
|
||||
ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. That’s what needs to be done to build a typical analytical report and that’s what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible:
|
||||
ClickHouse 最初是作为一个原型构建的,它的单一任务就是尽可能快速地过滤和聚合数据。这正是构建典型分析报告所需做的,也是典型 [GROUP BY](../../sql-reference/statements/select/group-by.md) 查询所做的。ClickHouse 团队做出了几个高层次的决策,这些决策组合在一起使得实现这一任务成为可能:
|
||||
|
||||
Column-oriented storage
|
||||
: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted.
|
||||
列式存储
|
||||
: 源数据通常包含数百甚至数千列,而报告可能只使用其中的几列。系统需要避免读取不必要的列,否则大部分昂贵的磁盘读取操作将被浪费。
|
||||
|
||||
Indexes
|
||||
: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns.
|
||||
索引
|
||||
: ClickHouse 在内存中保留数据结构,允许不仅读取使用的列,而且只读取这些列的必要行范围。
|
||||
|
||||
Data compression
|
||||
: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs) that can make data even more compact.
|
||||
数据压缩
|
||||
: 将同一列的不同值存储在一起通常会导致更好的压缩比(与行式系统相比),因为在实际数据中列通常对相邻行有相同或不太多的不同值。除了通用压缩之外,ClickHouse 还支持 [专用编解码器](../../sql-reference/statements/create/table.mdx/#create-query-specialized-codecs),可以使数据更加紧凑。
|
||||
|
||||
Vectorized query execution
|
||||
: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage.
|
||||
向量化查询执行
|
||||
: ClickHouse 不仅以列的形式存储数据,而且以列的形式处理数据。这导致更好的 CPU 缓存利用率,并允许使用 [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU 指令。
|
||||
|
||||
Scalability
|
||||
: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well.
|
||||
可扩展性
|
||||
: ClickHouse 可以利用所有可用的 CPU 核心和磁盘来执行甚至是单个查询。不仅在单个服务器上,而且在集群的所有 CPU 核心和磁盘上。
|
||||
|
||||
But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider:
|
||||
但许多其他数据库管理系统也使用类似的技术。真正使 ClickHouse 脱颖而出的是 **对底层细节的关注**。大多数编程语言为最常见的算法和数据结构提供了实现,但它们往往过于通用而无法高效。每个任务都可以被视为具有各种特征的景观,而不是仅仅随意投入某个实现。例如,如果您需要一个哈希表,这里有一些关键问题需要考虑:
|
||||
|
||||
- Which hash function to choose?
|
||||
- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)?
|
||||
- Memory layout: one array for keys and values or separate arrays? Will it store small or large values?
|
||||
- Fill factor: when and how to resize? How to move values around on resize?
|
||||
- Will values be removed and which algorithm will work better if they will?
|
||||
- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching?
|
||||
- 选择哪种哈希函数?
|
||||
- 冲突解决算法:[开放寻址](https://en.wikipedia.org/wiki/Open_addressing)还是[链接](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)?
|
||||
- 内存布局:一个数组用于键和值还是分开的数组?它会存储小值还是大值?
|
||||
- 填充因子:何时以及如何调整大小?在调整大小时如何移动值?
|
||||
- 是否会移除值,如果会,哪种算法会更好?
|
||||
- 我们是否需要使用位图进行快速探测,字符串键的内联放置,对不可移动值的支持,预取和批处理?
|
||||
|
||||
Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query.
|
||||
哈希表是 `GROUP BY` 实现的关键数据结构,ClickHouse 会根据每个特定查询自动选择 [30 多种变体](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) 中的一种。
|
||||
|
||||
The same goes for algorithms, for example, in sorting you might consider:
|
||||
算法也是如此,例如,在排序中,您可能会考虑:
|
||||
|
||||
- What will be sorted: an array of numbers, tuples, strings, or structures?
|
||||
- Is all data available completely in RAM?
|
||||
- Do we need a stable sort?
|
||||
- Do we need a full sort? Maybe partial sort or n-th element will suffice?
|
||||
- How to implement comparisons?
|
||||
- Are we sorting data that has already been partially sorted?
|
||||
- 将要排序的是数字数组、元组、字符串还是结构?
|
||||
- 所有数据是否完全可用于 RAM?
|
||||
- 我们需要稳定排序吗?
|
||||
- 我们需要完全排序吗?也许部分排序或第 n 个元素就足够了?
|
||||
- 如何实现比较?
|
||||
- 我们正在对已经部分排序的数据进行排序吗?
|
||||
|
||||
Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/).
|
||||
他们所依赖的算法根据其所处理的数据特性,往往可以比通用算法做得更好。如果事先真的不知道,系统可以尝试各种实现,并在运行时选择最佳的一种。例如,看一篇关于 [ClickHouse 中 LZ4 解压缩是如何实现的文章](https://habr.com/en/company/yandex/blog/457612/)。
|
||||
|
||||
Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time you’ll indeed find a gem.
|
||||
最后但同样重要的是,ClickHouse 团队始终关注互联网上人们声称他们提出了最佳的实现、算法或数据结构来做某事,并尝试它。这些声称大多是虚假的,但有时你确实会找到一颗宝石。
|
||||
|
||||
:::info Tips for building your own high-performance software
|
||||
- Keep in mind low-level details when designing your system.
|
||||
- Design based on hardware capabilities.
|
||||
- Choose data structures and abstractions based on the needs of the task.
|
||||
- Provide specializations for special cases.
|
||||
- Try new, “best” algorithms, that you read about yesterday.
|
||||
- Choose an algorithm in runtime based on statistics.
|
||||
- Benchmark on real datasets.
|
||||
- Test for performance regressions in CI.
|
||||
- Measure and observe everything.
|
||||
:::info 构建高性能软件的提示
|
||||
- 设计系统时要考虑到底层细节。
|
||||
- 基于硬件能力进行设计。
|
||||
- 根据任务的需求选择数据结构和抽象。
|
||||
- 为特殊情况提供专门化。
|
||||
- 尝试您昨天阅读的关于新的“最佳”算法。
|
||||
- 根据统计数据在运行时选择算法。
|
||||
- 在真实数据集上进行基准测试。
|
||||
- 在 CI 中测试性能回归。
|
||||
- 测量并观察一切。
|
||||
:::
|
||||
|
@ -1,35 +1,35 @@
|
||||
---
|
||||
slug: /zh/faq/integration/json-import
|
||||
title: How to import JSON into ClickHouse?
|
||||
title: 如何将 JSON 导入到 ClickHouse?
|
||||
toc_hidden: true
|
||||
sidebar_position: 11
|
||||
---
|
||||
|
||||
# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse}
|
||||
# 如何将 JSON 导入到 ClickHouse? {#how-to-import-json-into-clickhouse}
|
||||
|
||||
ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline.
|
||||
ClickHouse 支持多种[输入和输出的数据格式](../../interfaces/formats.md)。其中包括多种 JSON 变体,但最常用于数据导入的是 [JSONEachRow](../../interfaces/formats.md#jsoneachrow)。它期望每行一个 JSON 对象,每个对象由一个新行分隔。
|
||||
|
||||
## Examples {#examples}
|
||||
## 示例 {#examples}
|
||||
|
||||
Using [HTTP interface](../../interfaces/http.md):
|
||||
使用 [HTTP 接口](../../interfaces/http.md):
|
||||
|
||||
``` bash
|
||||
$ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @-
|
||||
```
|
||||
|
||||
Using [CLI interface](../../interfaces/cli.md):
|
||||
使用 [CLI接口](../../interfaces/cli.md):
|
||||
|
||||
``` bash
|
||||
$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow"
|
||||
```
|
||||
|
||||
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
|
||||
除了手动插入数据外,您可能会考虑使用 [客户端库](../../interfaces/index.md) 之一。
|
||||
|
||||
## Useful Settings {#useful-settings}
|
||||
## 实用设置 {#useful-settings}
|
||||
|
||||
- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them).
|
||||
- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type.
|
||||
- `input_format_skip_unknown_fields` 允许插入 JSON,即使存在表格架构中未出现的额外字段(通过丢弃它们)。
|
||||
- `input_format_import_nested_json` 允许将嵌套 JSON 对象插入到 [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) 类型的列中。
|
||||
|
||||
:::note
|
||||
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface.
|
||||
对于 HTTP 接口,设置作为 `GET` 参数指定;对于 `CLI` 接口,则作为前缀为 -- 的附加命令行参数。
|
||||
:::
|
@ -1,16 +1,16 @@
|
||||
---
|
||||
slug: /zh/faq/integration/oracle-odbc
|
||||
title: What if I have a problem with encodings when using Oracle via ODBC?
|
||||
title: 使用 Oracle ODBC 时遇到编码问题怎么办?
|
||||
toc_hidden: true
|
||||
sidebar_position: 20
|
||||
---
|
||||
|
||||
# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings}
|
||||
# 使用 Oracle ODBC 时遇到编码问题怎么办? {#oracle-odbc-encodings}
|
||||
|
||||
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
|
||||
如果您使用 Oracle 作为 ClickHouse 外部字典的数据源,并通过 Oracle ODBC 驱动程序,您需要在 `/etc/default/clickhouse` 中为 `NLS_LANG` 环境变量设置正确的值。更多信息,请参阅 [Oracle NLS_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html)。
|
||||
|
||||
**Example**
|
||||
**示例**
|
||||
|
||||
``` sql
|
||||
NLS_LANG=RUSSIAN_RUSSIA.UTF8
|
||||
```
|
||||
```
|
@ -1,44 +1,44 @@
|
||||
---
|
||||
slug: /zh/faq/operations/delete-old-data
|
||||
title: Is it possible to delete old records from a ClickHouse table?
|
||||
title: 是否可以从ClickHouse表中删除旧记录?
|
||||
toc_hidden: true
|
||||
sidebar_position: 20
|
||||
---
|
||||
|
||||
# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table}
|
||||
# 是否可以从ClickHouse表中删除旧记录? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table}
|
||||
|
||||
The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios.
|
||||
简短的答案是“可以”。ClickHouse具有多种机制,允许通过删除旧数据来释放磁盘空间。每种机制都针对不同的场景。
|
||||
|
||||
## TTL {#ttl}
|
||||
|
||||
ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column.
|
||||
ClickHouse 允许在某些条件发生时自动删除值。这个条件被配置为基于任何列的表达式,通常只是针对任何时间戳列的静态偏移量。
|
||||
|
||||
The key advantage of this approach is that it does not need any external system to trigger, once TTL is configured, data removal happens automatically in background.
|
||||
这种方法的主要优势是它不需要任何外部系统来触发,一旦配置了 TTL,数据删除就会自动在后台发生。
|
||||
|
||||
:::note
|
||||
TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD.
|
||||
TTL 也可以用来将数据移动到非 [/dev/null](https://en.wikipedia.org/wiki/Null_device) 的不同存储系统,例如从 SSD 到 HDD。
|
||||
:::
|
||||
|
||||
More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
|
||||
有关 [配置 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的更多详细信息。
|
||||
|
||||
## ALTER DELETE {#alter-delete}
|
||||
|
||||
ClickHouse does not have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar.
|
||||
ClickHouse没有像[OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing)数据库那样的实时点删除。最接近的东西是 `Mutation`,执行 `ALTER ... DELETE` 或 `ALTER ... UPDATE` 查询,以区别于普通的`DELETE`或`UPDATE`。因为它们是异步批处理操作,而不是立即修改。`ALTER TABLE`前缀后的其余语法相似。
|
||||
|
||||
`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even there’s only a single row to be deleted.
|
||||
`ALTER DELETE`可以灵活地用来删除旧数据。如果你需要定期这样做,主要缺点将是需要有一个外部系统来提交查询。还有一些性能方面的考虑,因为即使只有一行要被删除,突变也会重写完整部分。
|
||||
|
||||
This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant.
|
||||
这是使基于ClickHouse的系统符合[GDPR](https://gdpr-info.eu)的最常见方法。
|
||||
|
||||
More details on [mutations](../../sql-reference/statements/alter.md/#alter-mutations).
|
||||
有关[mutations](../../sql-reference/statements/alter.md/#alter-mutations)的更多详细信息。
|
||||
|
||||
## DROP PARTITION {#drop-partition}
|
||||
|
||||
`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. It’s not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use.
|
||||
`ALTER TABLE ... DROP PARTITION`提供了一种成本效率高的方式来删除整个分区。它不是那么灵活,需要在创建表时配置适当的分区方案,但仍然涵盖了大多数常见情况。像 mutations 一样,需要从外部系统执行以进行常规使用。
|
||||
|
||||
More details on [manipulating partitions](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition).
|
||||
有关[操作分区](../../sql-reference/statements/alter/partition.mdx/#alter_drop-partition)的更多详细信息。
|
||||
|
||||
## TRUNCATE {#truncate}
|
||||
|
||||
It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need.
|
||||
从表中删除所有数据是相当激进的,但在某些情况下可能正是您所需要的。
|
||||
|
||||
More details on [table truncation](../../sql-reference/statements/truncate.md).
|
||||
有关[truncate](../../sql-reference/statements/truncate.md)的更多详细信息。
|
||||
|
@ -248,7 +248,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
|
||||
|
||||
|
||||
``` sql
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
|
||||
```
|
||||
|
||||
### RESTART REPLICA {#query_language-system-restart-replica}
|
||||
|
@ -1010,6 +1010,7 @@ void Client::addOptions(OptionsDescription & options_description)
|
||||
("config,c", po::value<std::string>(), "config-file path (another shorthand)")
|
||||
("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
|
||||
("secure,s", "Use TLS connection")
|
||||
("no-secure,s", "Don't use TLS connection")
|
||||
("user,u", po::value<std::string>()->default_value("default"), "user")
|
||||
("password", po::value<std::string>(), "password")
|
||||
("ask-password", "ask-password")
|
||||
@ -1151,6 +1152,8 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
interleave_queries_files = options["interleave-queries-file"].as<std::vector<std::string>>();
|
||||
if (options.count("secure"))
|
||||
config().setBool("secure", true);
|
||||
if (options.count("no-secure"))
|
||||
config().setBool("no-secure", true);
|
||||
if (options.count("user") && !options["user"].defaulted())
|
||||
config().setString("user", options["user"].as<std::string>());
|
||||
if (options.count("password"))
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
const String & host_id_,
|
||||
const String & proxy_database_name_,
|
||||
ContextMutablePtr context_,
|
||||
Poco::Logger * log_)
|
||||
LoggerRawPtr log_)
|
||||
: WithMutableContext(context_),
|
||||
task_zookeeper_path(task_path_),
|
||||
host_id(host_id_),
|
||||
@ -230,7 +230,7 @@ private:
|
||||
|
||||
bool experimental_use_sample_offset{false};
|
||||
|
||||
Poco::Logger * log;
|
||||
LoggerRawPtr log;
|
||||
|
||||
UInt64 max_table_tries = 3;
|
||||
UInt64 max_shard_partition_tries = 3;
|
||||
|
@ -177,7 +177,7 @@ public:
|
||||
auto watch_callback =
|
||||
[my_stale = stale] (const Coordination::WatchResponse & rsp)
|
||||
{
|
||||
auto logger = &Poco::Logger::get("ClusterCopier");
|
||||
auto logger = getLogger("ClusterCopier");
|
||||
if (rsp.error == Coordination::Error::ZOK)
|
||||
{
|
||||
switch (rsp.type)
|
||||
|
@ -160,7 +160,7 @@ int DisksApp::main(const std::vector<String> & /*args*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specifiged");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specified");
|
||||
}
|
||||
|
||||
if (config().has("save-logs"))
|
||||
|
@ -413,13 +413,13 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
|
||||
switch (operation)
|
||||
{
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
|
||||
joining = query->args[1].safeGet<DB::String>();
|
||||
joining = query->args[1].safeGet<String>();
|
||||
break;
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
|
||||
leaving = query->args[1].safeGet<DB::String>();
|
||||
leaving = query->args[1].safeGet<String>();
|
||||
break;
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
|
||||
new_members = query->args[1].safeGet<DB::String>();
|
||||
new_members = query->args[1].safeGet<String>();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
|
@ -375,7 +375,7 @@ int KeeperClient::main(const std::vector<String> & /* args */)
|
||||
|
||||
if (!config().has("host") && !config().has("port") && !keys.empty())
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("KeeperClient"), "Found keeper node in the config.xml, will use it for connection");
|
||||
LOG_INFO(getLogger("KeeperClient"), "Found keeper node in the config.xml, will use it for connection");
|
||||
|
||||
for (const auto & key : keys)
|
||||
{
|
||||
|
@ -28,7 +28,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
|
||||
po::store(po::command_line_parser(argc, argv).options(desc).run(), options);
|
||||
Poco::AutoPtr<Poco::ConsoleChannel> console_channel(new Poco::ConsoleChannel);
|
||||
|
||||
Poco::Logger * logger = &Poco::Logger::get("KeeperConverter");
|
||||
LoggerPtr logger = getLogger("KeeperConverter");
|
||||
logger->setChannel(console_channel);
|
||||
|
||||
if (options.count("help"))
|
||||
|
@ -95,6 +95,7 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp
|
||||
@ -126,15 +127,17 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
|
||||
|
@ -624,7 +624,7 @@ catch (...)
|
||||
|
||||
void Keeper::logRevision() const
|
||||
{
|
||||
LOG_INFO(&Poco::Logger::get("Application"),
|
||||
LOG_INFO(getLogger("Application"),
|
||||
"Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}",
|
||||
VERSION_STRING,
|
||||
ClickHouseRevision::getVersionRevision(),
|
||||
|
@ -13,7 +13,7 @@ CatBoostLibraryHandlerFactory & CatBoostLibraryHandlerFactory::instance()
|
||||
}
|
||||
|
||||
CatBoostLibraryHandlerFactory::CatBoostLibraryHandlerFactory()
|
||||
: log(&Poco::Logger::get("CatBoostLibraryHandlerFactory"))
|
||||
: log(getLogger("CatBoostLibraryHandlerFactory"))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -31,7 +31,7 @@ private:
|
||||
/// map: model path --> catboost library handler
|
||||
std::unordered_map<String, CatBoostLibraryHandlerPtr> library_handlers TSA_GUARDED_BY(mutex);
|
||||
std::mutex mutex;
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -9,40 +9,40 @@ const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal";
|
||||
|
||||
void ExternalDictionaryLibraryAPI::log(LogLevel level, CString msg)
|
||||
{
|
||||
auto & logger = Poco::Logger::get(DICT_LOGGER_NAME);
|
||||
auto logger = getLogger(DICT_LOGGER_NAME);
|
||||
switch (level)
|
||||
{
|
||||
case LogLevel::TRACE:
|
||||
if (logger.trace())
|
||||
logger.trace(msg);
|
||||
if (logger->trace())
|
||||
logger->trace(msg);
|
||||
break;
|
||||
case LogLevel::DEBUG:
|
||||
if (logger.debug())
|
||||
logger.debug(msg);
|
||||
if (logger->debug())
|
||||
logger->debug(msg);
|
||||
break;
|
||||
case LogLevel::INFORMATION:
|
||||
if (logger.information())
|
||||
logger.information(msg);
|
||||
if (logger->information())
|
||||
logger->information(msg);
|
||||
break;
|
||||
case LogLevel::NOTICE:
|
||||
if (logger.notice())
|
||||
logger.notice(msg);
|
||||
if (logger->notice())
|
||||
logger->notice(msg);
|
||||
break;
|
||||
case LogLevel::WARNING:
|
||||
if (logger.warning())
|
||||
logger.warning(msg);
|
||||
if (logger->warning())
|
||||
logger->warning(msg);
|
||||
break;
|
||||
case LogLevel::ERROR:
|
||||
if (logger.error())
|
||||
logger.error(msg);
|
||||
if (logger->error())
|
||||
logger->error(msg);
|
||||
break;
|
||||
case LogLevel::CRITICAL:
|
||||
if (logger.critical())
|
||||
logger.critical(msg);
|
||||
if (logger->critical())
|
||||
logger->critical(msg);
|
||||
break;
|
||||
case LogLevel::FATAL:
|
||||
if (logger.fatal())
|
||||
logger.fatal(msg);
|
||||
if (logger->fatal())
|
||||
logger->fatal(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ void ExternalDictionaryLibraryHandlerFactory::create(
|
||||
|
||||
if (library_handlers.contains(dictionary_id))
|
||||
{
|
||||
LOG_WARNING(&Poco::Logger::get("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
|
||||
LOG_WARNING(getLogger("ExternalDictionaryLibraryHandlerFactory"), "Library handler with dictionary id {} already exists", dictionary_id);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(&Poco::Logger::get(name_))
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
|
||||
private:
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
const std::string name;
|
||||
const size_t keep_alive_timeout;
|
||||
};
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include "CatBoostLibraryHandler.h"
|
||||
#include "CatBoostLibraryHandlerFactory.h"
|
||||
#include "Common/ProfileEvents.h"
|
||||
#include "ExternalDictionaryLibraryHandler.h"
|
||||
#include "ExternalDictionaryLibraryHandlerFactory.h"
|
||||
|
||||
@ -44,9 +45,9 @@ namespace
|
||||
response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
|
||||
|
||||
if (!response.sent())
|
||||
*response.send() << message << std::endl;
|
||||
*response.send() << message << '\n';
|
||||
|
||||
LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
|
||||
LOG_WARNING(getLogger("LibraryBridge"), fmt::runtime(message));
|
||||
}
|
||||
|
||||
std::shared_ptr<Block> parseColumns(String && column_string)
|
||||
@ -91,12 +92,12 @@ static void writeData(Block data, OutputFormatPtr format)
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
HTMLForm params(getContext()->getSettingsRef(), request);
|
||||
@ -379,12 +380,12 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -418,12 +419,12 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
|
||||
size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("CatBoostLibraryBridgeRequestHandler"))
|
||||
, log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
|
||||
{
|
||||
LOG_TRACE(log, "Request URI: {}", request.getURI());
|
||||
HTMLForm params(getContext()->getSettingsRef(), request);
|
||||
@ -463,6 +464,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
{
|
||||
if (method == "catboost_list")
|
||||
{
|
||||
auto & read_buf = request.getStream();
|
||||
params.read(read_buf);
|
||||
|
||||
ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();
|
||||
|
||||
writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
|
||||
@ -500,6 +504,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
}
|
||||
else if (method == "catboost_removeAllModels")
|
||||
{
|
||||
auto & read_buf = request.getStream();
|
||||
params.read(read_buf);
|
||||
|
||||
CatBoostLibraryHandlerFactory::instance().removeAllModels();
|
||||
|
||||
String res = "1";
|
||||
@ -616,12 +623,12 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(&Poco::Logger::get("CatBoostLibraryBridgeExistsHandler"))
|
||||
, log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
|
||||
void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -20,13 +20,13 @@ class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler,
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
static constexpr inline auto FORMAT = "RowBinary";
|
||||
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
||||
@ -36,11 +36,11 @@ class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler,
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
||||
@ -65,11 +65,11 @@ class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithConte
|
||||
public:
|
||||
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
||||
@ -79,11 +79,11 @@ class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContex
|
||||
public:
|
||||
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
Poco::Logger * log;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user