Website Dockerfile fix

This commit is contained in:
Ivan Blinkov 2019-03-26 16:29:02 +03:00
commit f669f0ce23
15 changed files with 124 additions and 42 deletions

View File

@ -1,7 +1,33 @@
if (HAVE_SSSE3)
option (ENABLE_HYPERSCAN "Enable hyperscan" ON)
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
endif ()
set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
endif ()
if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
find_library (HYPERSCAN_LIBRARY hs)
find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
endif ()
if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
set (USE_HYPERSCAN 1)
elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
set (HYPERSCAN_LIBRARY hs)
set (USE_HYPERSCAN 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
message (STATUS "Using hyperscan: ${HYPERSCAN_INCLUDE_DIR} " : ${HYPERSCAN_LIBRARY})
endif()
message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
endif ()

View File

@ -305,6 +305,6 @@ if (USE_BASE64)
add_subdirectory (base64-cmake)
endif()
if (USE_HYPERSCAN)
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
add_subdirectory (hyperscan)
endif()

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 32abf16beb7bb8b243a4d100ccdd6acb271738c4
Subproject commit 471ea208abb92a5cba7d3a08a819bb728f27e95f

View File

@ -516,7 +516,7 @@ public:
template <typename ResultType, typename CountCharsCallback>
void searchFirstPosition(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const CountCharsCallback & count_chars_callback, ResultType & ans)
{
auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> UInt64
{
return this->searchOneFirstPosition(haystack, haystack_end, count_chars_callback);
};
@ -676,11 +676,11 @@ private:
}
template <typename CountCharsCallback>
inline size_t searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
{
const size_t fallback_size = fallback_needles.size();
size_t ans = std::numeric_limits<size_t>::max();
UInt64 ans = std::numeric_limits<UInt64>::max();
for (size_t i = 0; i < fallback_size; ++i)
if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end)
@ -705,7 +705,7 @@ private:
}
}
}
if (ans == std::numeric_limits<size_t>::max())
if (ans == std::numeric_limits<UInt64>::max())
return 0;
return ans;
}

View File

@ -24,6 +24,7 @@
#cmakedefine01 USE_CPUINFO
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 LLVM_HAS_RTTI

View File

@ -1,8 +1,6 @@
#include <Functions/FunctionsStringSearch.h>
#include "FunctionsStringSearch.h"
#include <Columns/ColumnFixedString.h>
#include <Common/config.h>
#include <DataTypes/DataTypeFixedString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/Regexps.h>
@ -11,12 +9,16 @@
#include <re2/stringpiece.h>
#include <Poco/UTF8String.h>
#include <Common/Volnitsky.h>
#include <algorithm>
#include <memory>
#ifdef __SSSE3__
# include <hs.h>
#include <Common/config.h>
#if USE_HYPERSCAN
# if __has_include(<hs/hs.h>)
# include <hs/hs.h>
# else
# include <hs.h>
# endif
#endif
#if USE_RE2_ST
@ -617,7 +619,7 @@ struct MultiMatchAnyImpl
{
(void)FindAny;
(void)FindAnyIndex;
#ifdef __SSSE3__
#if USE_HYPERSCAN
using ScratchPtr = std::unique_ptr<hs_scratch_t, DB::MultiRegexps::HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex>(needles);
@ -670,7 +672,7 @@ struct MultiMatchAnyImpl
res[i] = j + 1;
}
}
#endif // __SSSE3__
#endif // USE_HYPERSCAN
}
};

View File

@ -5,13 +5,17 @@
#include <Common/OptimizedRegularExpression.h>
#include <Common/ProfileEvents.h>
#include <common/StringRef.h>
#include <memory>
#include <string>
#include <vector>
#ifdef __SSSE3__
# include <hs.h>
#include <Common/config.h>
#if USE_HYPERSCAN
# if __has_include(<hs/hs.h>)
# include <hs/hs.h>
# else
# include <hs.h>
# endif
#endif
namespace ProfileEvents
@ -63,7 +67,7 @@ namespace Regexps
}
}
#ifdef __SSSE3__
#if USE_HYPERSCAN
namespace MultiRegexps
{
@ -139,6 +143,6 @@ namespace MultiRegexps
}
}
#endif // __SSSE3__
#endif // USE_HYPERSCAN
}

View File

@ -56,6 +56,7 @@ const char * auto_config_build[]
"USE_PROTOBUF", "@USE_PROTOBUF@",
"USE_BROTLI", "@USE_BROTLI@",
"USE_SSL", "@USE_SSL@",
"USE_HYPERSCAN", "@USE_HYPERSCAN@",
nullptr, nullptr
};

View File

@ -404,6 +404,8 @@ def main(args):
def find_binary(name):
if os.path.exists(name) and os.access(name, os.X_OK):
return True
paths = os.environ.get("PATH").split(':')
for path in paths:
if os.access(os.path.join(path, name), os.X_OK):
@ -416,7 +418,7 @@ if __name__ == '__main__':
parser=ArgumentParser(description='ClickHouse functional tests')
parser.add_argument('-q', '--queries', help='Path to queries dir')
parser.add_argument('--tmp', help='Path to tmp dir')
parser.add_argument('-b', '--binary', default='clickhouse', help='Main clickhouse binary')
parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
parser.add_argument('-c', '--client', help='Client program')
parser.add_argument('--extract_from_config', help='extract-from-config program')
parser.add_argument('--configclient', help='Client config (if you use not default ports)')

View File

@ -9,18 +9,18 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd)
DATA_DIR=${DATA_DIR:=`mktemp -d /tmp/clickhouse.test..XXXXX`}
DATA_DIR_PATTERN=${DATA_DIR_PATTERN:=/tmp/clickhouse} # path from config file, will be replaced to temporary
LOG_DIR=${LOG_DIR:=$DATA_DIR/log}
export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
export CLICKHOUSE_BINARY_NAME=${CLICKHOUSE_BINARY_NAME:="clickhouse"}
( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
[ -d "$ROOT_DIR/build${BUILD_TYPE}" ] && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}}
BUILD_DIR=${BUILD_DIR:=$ROOT_DIR}
[ -x ${CLICKHOUSE_BINARY}-server" ] && [ -x ${CLICKHOUSE_BINARY}-client" ] && BIN_DIR= # Allow run in /usr/bin
( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY}-server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY} server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY}-client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY} client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY} extract-from-config}
[ -x ${CLICKHOUSE_BINARY_NAME}-server" ] && [ -x ${CLICKHOUSE_BINARY_NAME}-client" ] && BIN_DIR= # Allow run in /usr/bin
( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} server}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} client}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config}
[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} extract-from-config}
[ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/
CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR}
@ -131,7 +131,7 @@ else
TEST_DICT=${TEST_DICT=1}
CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q"
$CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;'
CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long"
if [ "${TEST_RUN_STRESS}" ]; then
# Running test in parallel will fail some results (tests can create/fill/drop same tables)

View File

@ -128,16 +128,29 @@ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that don't return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special clickhouse-compressor program to work with it (it is installed with the clickhouse-client package).
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of the data insertion, you may disable the server-side checksum verification with the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified 'compress=1' in the URL, the server will compress the data it sends you.
If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method.
If you specified `compress = 1` in the URL, the server compresses the data it sends you.
If you specified `decompress = 1` in the URL, the server decompresses the same data that you pass in the `POST` method.
It is also possible to use the standard gzip-based HTTP compression. To send a POST request compressed using gzip, append the request header `Content-Encoding: gzip`.
In order for ClickHouse to compress the response using gzip, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse setting `enable_http_compression`.
It is also possible to use the standard `gzip`-based [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a `POST` request compressed using `gzip`, append the request header `Content-Encoding: gzip`.
In order for ClickHouse to compress the response using `gzip`, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the compression level of the data with the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
Examples of sending the data with compression:
```bash
#Sending the data to the server:
curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
#Sending the data to the client:
echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
!!! note "Note"
Some HTTP clients can decompress data (`gzip` and `deflate`) from the server by default and you may get the decompressed data even if you use the compression settings correctly.
You can use the 'database' URL parameter to specify the default database.
```bash

View File

@ -79,6 +79,41 @@ Enable or disable fsync when writing .sql files. Enabled by default.
It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed.
## enable_http_compression {#settings-enable_http_compression}
Enables/disables compression of the data in the response to an HTTP request.
For more information, read the [HTTP interface description](../../interfaces/http.md).
Possible values:
- 0 — The functionality is disabled.
- 1 — The functionality is enabled.
Default value: 0.
## http_zlib_compression_level {#settings-http_zlib_compression_level}
Sets the level of the compression of the data in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
Possible values: numbers from 1 to 9.
Default value: 3.
## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
Enables/disables the verification of the checksum when uncompressing the HTTP POST data from the client. Used only for ClickHouse native format of compression (neither `gzip` nor `deflate`).
For more information, read the [HTTP interface description](../../interfaces/http.md).
Possible values:
- 0 — The functionality is disabled.
- 1 — The functionality is enabled.
Default value: 0.
## input_format_allow_errors_num
Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.).

View File

@ -191,9 +191,7 @@ added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient
range scans and add the remaining dimension columns to the sorting key tuple.
[ALTER of the sorting key](../../query_language/alter.md) is a
lightweight operation because when a new column is simultaneously added to the table and to the sorting key
data parts need not be changed (they remain sorted by the new sorting key expression).
[ALTER of the sorting key](../../query_language/alter.md) is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don't need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the just added column, the data at the moment of table modification is sorted by both the old and the new sorting key.
### Use of Indexes and Partitions in Queries

View File

@ -189,7 +189,7 @@ ClickHouse не требует уникального первичного кл
В этом сценарии имеет смысл оставить в первичном ключе всего несколько столбцов, которые обеспечат эффективную
фильтрацию по индексу, а остальные столбцы-измерения добавить в выражение ключа сортировки.
[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять
[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и в ключ сортировки, не нужно изменять
данные кусков (они остаются упорядоченными и по новому выражению ключа).
### Использование индексов и партиций в запросах

View File

@ -22,5 +22,5 @@ env TEST_RUN=1 \
`# Use all possible contrib libs from system` \
`# psmisc - killall` \
`# gdb - symbol test in pbuilder` \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev $EXTRAPACKAGES" \
EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev $EXTRAPACKAGES" \
pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT